diff --git a/Makefile b/Makefile index 52b5979..0175cde 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ TARGET = cmsg -OBJECTS = main.o harness.o net.o util.o relay.o dataq.o +OBJECTS = main.o harness.o net.o util.o relay.o hashtable.o dataq.o sexp.o sexpio.o CFLAGS = -D_XOPEN_SOURCE=600 -Wall -O0 -g #CFLAGS = -D_XOPEN_SOURCE=600 -Wall -O3 diff --git a/Sexp.txt b/Sexp.txt new file mode 100644 index 0000000..caf4542 --- /dev/null +++ b/Sexp.txt @@ -0,0 +1,699 @@ +Network Working Group R. Rivest +Internet Draft May 4, 1997 +Expires November 4, 1997 + + + S-Expressions + draft-rivest-sexp-00.txt + + +Status of this Memo + + Distribution of this memo is unlimited. + + This document is an Internet-Draft. Internet Drafts are working + documents of the Internet Engineering Task Force (IETF), its Areas, + and its Working Groups. Note that other groups may also distribute + working documents as Internet Drafts. + + Internet Drafts are draft documents valid for a maximum of six + months, and may be updated, replaced, or obsoleted by other documents + at any time. It is not appropriate to use Internet Drafts as + reference material, or to cite them other than as a ``working draft'' + or ``work in progress.'' + + To learn the current status of any Internet-Draft, please check the + ``1id-abstracts.txt'' listing contained in the internet-drafts Shadow + Directories on: ftp.is.co.za (Africa), nic.nordu.net (Europe), + ds.internic.net (US East Coast), ftp.isi.edu (US West Coast), + or munnari.oz.au (Pacific Rim) + + +Abstract + +This memo describes a data structure called "S-expressions" that are +suitable for representing arbitrary complex data structures. We make +precise the encodings of S-expressions: we give a "canonical form" for +S-expressions, described two "transport" representations, and also +describe an "advanced" format for display to people. + + + +1. Introduction + +S-expressions are data structures for representing complex data. They +are either byte-strings ("octet-strings") or lists of simpler +S-expressions. Here is a sample S-expression: + + (snicker "abc" (#03# |YWJj|)) + +It is a list of length three: + + -- the octet-string "snicker" + + -- the octet-string "abc" + + -- a sub-list containing two elements: + - the hexadecimal constant #03# + - the base-64 constant |YWJj| (which is the same as "abc") + +This note gives a specific proposal for constructing and utilizing +S-expressions. The proposal is independent of any particular application. + +Here are the design goals for S-expressions: + + -- generality: S-expressions should be good at representing arbitrary + data. + + -- readability: it should be easy for someone to examine and + understand the structure of an S-expression. + + -- economy: S-expressions should represent data compactly. + + -- tranportability: S-expressions should be easy to transport + over communication media (such as email) that are known to be + less than perfect. + + -- flexibility: S-expressions should make it relatively simple to + modify and extend data structures. + + -- canonicalization: it should be easy to produce a unique + "canonical" form of an S-expression, for digital signature purposes. + + -- efficiency: S-expressions should admit in-memory representations + that allow efficient processing. + + +Section 2 gives an introduction to S-expressions. +Section 3 discusses the character sets used. +Section 4 presents the various representations of octet-strings. +Section 5 describes how to represent lists. +Section 6 discusses how S-expressions are represented for various uses. +Section 7 gives a BNF syntax for S-expressions. +Section 8 talks about how S-expressions might be represented in memory. +Section 9 briefly describes implementations for handling S-expressions. +Section 10 discusses how applications might utilize S-expressions. +Section 11 gives historical notes on S-expressions. +Section 12 gives references. + +2. S-expressions -- informal introduction + +Informally, an S-expression is either: + -- an octet-string, or + -- a finite list of simpler S-expressions. + +An octet-string is a finite sequence of eight-bit octets. There may be +many different but equivalent ways of representing an octet-string + + abc -- as a token + + "abc" -- as a quoted string + + #616263# -- as a hexadecimal string + + 3:abc -- as a length-prefixed "verbatim" encoding + + {MzphYmM=} -- as a base-64 encoding of the verbatim encoding + (that is, an encoding of "3:abc") + + |YWJj| -- as a base-64 encoding of the octet-string "abc" + +These encodings are all equivalent; they all denote the same octet string. + +We will give details of these encodings later on, and also describe how to +give a "display type" to a byte string. + +A list is a finite sequence of zero or more simpler S-expressions. A list +may be represented by using parentheses to surround the sequence of encodings +of its elements, as in: + + (abc (de #6667#) "ghi jkl") + +As we see, there is variability possible in the encoding of an +S-expression. In some cases, it is desirable to standardize or +restrict the encodings; in other cases it is desirable to have no +restrictions. The following are the target cases we aim to handle: + + -- a "transport" encoding for transporting the S-expression between + computers. + + -- a "canonical" encoding, used when signing the S-expression. + + -- an "advanced" encoding used for input/output to people. + + -- an "in-memory" encoding used for processing the S-expression in + the computer. + +These need not be different; in this proposal the canonical encoding +is the same as the transport encoding, for example. In this note we +propose (related) encoding techniques for each of these uses. + +3. Character set + +We will be describing encodings of S-expressions. Except when giving +"verbatim" encodings, the character set used is limited to the following +characters in US-ASCII: + Alphabetic: A B ... Z a b ... z + numeric: 0 1 ... 9 + whitespace: space, horizontal tab, vertical tab, form-feed + carriage-return, line-feed + The following graphics characters, which we call "pseudo-alphabetic": + - hyphen or minus + . period + / slash + _ underscore + : colon + * asterisk + + plus + = equal + The following graphics characters, which are "reserved punctuation": + ( left parenthesis + ) right parenthesis + [ left bracket + ] right bracket + { left brace + } right brace + | vertical bar + # number sign + " double quote + & ampersand + \ backslash + The following characters are unused and unavailable, except in + "verbatim" encodings: + ! exclamation point + % percent + ^ circumflex + ~ tilde + ; semicolon + ' apostrophe + , comma + < less than + > greater than + ? question mark + + +4. Octet string representations + +This section describes in detail the ways in which an octet-string may +be represented. + +We recall that an octet-string is any finite sequence of octets, and +that the octet-string may have length zero. + + +4.1 Verbatim representation + +A verbatim encoding of an octet string consists of four parts: + + -- the length (number of octets) of the octet-string, + given in decimal most significant digit first, with + no leading zeros. + + -- a colon ":" + + -- the octet string itself, verbatim. + +There are no blanks or whitespace separating the parts. No "escape +sequences" are interpreted in the octet string. This encoding is also +called a "binary" or "raw" encoding. + +Here are some sample verbatim encodings: + + 3:abc + 7:subject + 4::::: + 12:hello world! + 10:abcdefghij + 0: + +4.2 Quoted-string representation + +The quoted-string representation of an octet-string consists of: + + -- an optional decimal length field + + -- an initial double-quote (") + + -- the octet string with "C" escape conventions (\n,etc) + + -- a final double-quote (") + +The specified length is the length of the resulting string after any +escape sequences have been handled. The string does not have any +"terminating NULL" that C includes, and the length does not count such +a character. + +The length is optional. + +The escape conventions within the quoted string are as follows (these follow +the "C" programming language conventions, with an extension for +ignoring line terminators of just LF or CRLF): + \b -- backspace + \t -- horizontal tab + \v -- vertical tab + \n -- new-line + \f -- form-feed + \r -- carriage-return + \" -- double-quote + \' -- single-quote + \\ -- back-slash + \ooo -- character with octal value ooo (all three digits + must be present) + \xhh -- character with hexadecimal value hh (both digits + must be present) + \ -- causes carriage-return to be ignored. + \ -- causes linefeed to be ignored + \ -- causes CRLF to be ignored. + \ -- causes LFCR to be ignored. + +Here are some examples of quoted-string encodings: + + "subject" + "hi there" + 7"subject" + 3"\n\n\n" + "This has\n two lines." + "This has\ + one." + "" + +4.3 Token representation + +An octet string that meets the following conditions may be given +directly as a "token". + + -- it does not begin with a digit + + -- it contains only characters that are + -- alphabetic (upper or lower case), + -- numeric, or + -- one of the eight "pseudo-alphabetic" punctuation marks: + - . / _ : * + = + (Note: upper and lower case are not equivalent.) + (Note: A token may begin with punctuation, including ":"). + +Here are some examples of token representations: + + subject + not-before + class-of-1997 + //microsoft.com/names/smith + * + + +4.4 Hexadecimal representation + +An octet-string may be represented with a hexadecimal encoding consisting of: + + -- an (optional) decimal length of the octet string + + -- a sharp-sign "#" + + -- a hexadecimal encoding of the octet string, with each octet + represented with two hexadecimal digits, most significant + digit first. + + -- a sharp-sign "#" + +There may be whitespace inserted in the midst of the hexadecimal +encoding arbitrarily; it is ignored. It is an error to have +characters other than whitespace and hexadecimal digits. + +Here are some examples of hexadecimal encodings: + + #616263# -- represents "abc" + 3#616263# -- also represents "abc" + # 616 + 263 # -- also represents "abc" + + +4.5 Base-64 representation + +An octet-string may be represented in a base-64 coding consisting of: + + -- an (optional) decimal length of the octet string + + -- a vertical bar "|" + + -- the rfc 1521 base-64 encoding of the octet string. + + -- a final vertical bar "|" + +The base-64 encoding uses only the characters + A-Z a-z 0-9 + / = +It produces four characters of output for each three octets of input. +If the input has one or two left-over octets of input, it produces an +output block of length four ending in two or one equals signs, respectively. +Output routines compliant with this standard MUST output the equals signs +as specified. Input routines MAY accept inputs where the equals signs are +dropped. + +There may be whitespace inserted in the midst of the base-64 encoding +arbitrarily; it is ignored. It is an error to have characters other +than whitespace and base-64 characters. + +Here are some examples of base-64 encodings: + + |YWJj| -- represents "abc" + | Y W + J j | -- also represents "abc" + 3|YWJj| -- also represents "abc" + |YWJjZA==| -- represents "abcd" + |YWJjZA| -- also represents "abcd" + + +4.6 Display hint + +Any octet string may be preceded by a single "display hint". + +The purposes of the display hint is to provide information on how +to display the octet string to a user. It has no other function. +Many of the MIME types work here. + +A display-hint is an octet string surrounded by square brackets. +There may be whitespace separating the octet string from the +surrounding brackets. Any of the legal formats may be used for the +octet string. + +Here are some examples of display-hints: + + [image/gif] + [URI] + [charset=unicode-1-1] + [text/richtext] + [application/postscript] + [audio/basic] + ["http://abc.com/display-types/funky.html"] + +In applications an octet-string that is untyped may be considered to have +a pre-specified "default" mime type. The mime type + "text/plain; charset=iso-8859-1" +is the standard default. + + +4.7 Equality of octet-strings + +Two octet strings are considered to be "equal" if and only if they +have the same display hint and the same data octet strings. + +Note that octet-strings are "case-sensitive"; the octet-string "abc" +is not equal to the octet-string "ABC". + +An untyped octet-string can be compared to another octet-string (typed +or not) by considering it as a typed octet-string with the default +mime-type. + + +5. Lists + +Just as with octet-strings, there are several ways to represent an +S-expression. Whitespace may be used to separate list elements, but +they are only required to separate two octet strings when otherwise +the two octet strings might be interpreted as one, as when one token +follows another. Also, whitespace may follow the initial left +parenthesis, or precede the final right parenthesis. + +Here are some examples of encodings of lists: + + (a b c) + + ( a ( b c ) ( ( d e ) ( e f ) ) ) + + (11:certificate(6:issuer3:bob)(7:subject5:alice)) + + ({3Rt=} "1997" murphy 3:{XC++}) + + +6. Representation types + +There are three "types" of representations: + + -- canonical + + -- basic transport + + -- advanced transport + +The first two MUST be supported by any implementation; the last is +optional. + + +6.1 Canonical representation + +This canonical representation is used for digital signature purposes, +transmission, etc. It is uniquely defined for each S-expression. It +is not particularly readable, but that is not the point. It is +intended to be very easy to parse, to be reasonably economical, and to +be unique for any S-expression. + +The "canonical" form of an S-expression represents each octet-string +in verbatim mode, and represents each list with no blanks separating +elements from each other or from the surrounding parentheses. + +Here are some examples of canonical representations of S-expressions: + + (6:issuer3:bob) + + (4:icon[12:image/bitmap]9:xxxxxxxxx) + + (7:subject(3:ref5:alice6:mother)) + + +6.2 Basic transport representation + +There are two forms of the "basic transport" representation: + + -- the canonical representation + + -- an rfc-2045 base-64 representation of the canonical representation, + surrounded by braces. + +The transport mechanism is intended to provide a universal means of +representing S-expressions for transport from one machine to another. + +Here are some examples of an S-expression represented in basic +transport mode: + + (1:a1:b1:c) + + {KDE6YTE6YjE6YykA} + + (this is the same S-expression encoded in base-64) + +There is a difference between the brace notation for base-64 used here +and the || notation for base-64'd octet-strings described above. Here +the base-64 contents are converted to octets, and then re-scanned as +if they were given originally as octets. With the || notation, the +contents are just turned into an octet-string. + + +6.3 Advanced transport representation + +The "advanced transport" representation is intended to provide more +flexible and readable notations for documentation, design, debugging, +and (in some cases) user interface. + +The advanced transport representation allows all of the representation +forms described above, include quoted strings, base-64 and hexadecimal +representation of strings, tokens, representations of strings with +omitted lengths, and so on. + + +7. BNF for syntax + +We give separate BNF's for canonical and advanced forms of S-expressions. +We use the following notation: + * means 0 or more occurrences of + + means 1 or more occurrences of + ? means 0 or 1 occurrences of + parentheses are used for grouping, as in ( | )* + +For canonical and basic transport: + + :: | + :: ? ; + :: ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: "(" * ")" ; + :: "0" | ... | "9" ; + +For advanced transport: + + :: | + :: ? ; + :: | | | | + ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: + ; + :: ? "|" ( | )* "|" ; + :: "#" ( | )* "#" ; + :: ? + :: "\"" "\"" + :: "(" ( | )* ")" ; + :: * ; + :: | | ; + :: | | ; + :: "a" | ... | "z" ; + :: "A" | ... | "Z" ; + :: "0" | ... | "9" ; + :: | "A" | ... | "F" | "a" | ... | "f" ; + :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; + :: " " | "\t" | "\r" | "\n" ; + :: | | "+" | "/" | "=" ; + :: "" ; + +8. In-memory representations + +For processing, the S-expression would typically be parsed and represented +in memory in a more more amenable to efficient processing. We suggest +two alternatives: + + -- "list-structure" + + -- "array-layout" + +We only sketch these here, as they are only suggestive. The code referenced +below illustrates these styles in more detail. + + +8.1. List-structure memory representation + +Here there are separate records for simple-strings, strings, and +lists. An S-expression of the form ("abc" "de") would require two +records for the simple strings, two for the strings, and two for the +list elements. This is a fairly conventional representation, and +details are omitted here. + +8.2 Array-layout memory representation + +Here each S-expression is represented as a contiguous array of bytes. +The first byte codes the "type" of the S-expression: + + 01 octet-string + + 02 octet-string with display-hint + + 03 beginning of list (and 00 is used for "end of list") + +Each of the three types is immediately followed by a k-byte integer +indicating the size (in bytes) of the following representation. Here +k is an integer that depends on the implementation, it might be +anywhere from 2 to 8, but would be fixed for a given implementation; +it determines the size of the objects that can be handled. The transport +and canonical representations are independent of the choice of k made by +the implementation. + +Although the length of lists are not given in the usual S-expression +notations, it is easy to fill them in when parsing; when you reach a +right-parenthesis you know how long the list representation was, and +where to go back to fill in the missing length. + + +8.2.1 Octet string + +This is represented as follows: + + 01 + +For example (here k = 2) + + 01 0003 a b c + +8.2.2 Octet-string with display-hint + +This is represented as follows: + + 02 + 01 /* for display-type */ + 01 /* for octet-string */ + +For example, the S-expression + + [gif] #61626364# + +would be represented as (with k = 2) + + 02 000d + 01 0003 g i f + 01 0004 61 62 63 64 + +8.2.3 List + +This is represented as + + 03 ... 00 + +For example, the list (abc [d]ef (g)) is represented in memory as (with k=2) + + 03 001b + 01 0003 a b c + 02 0009 + 01 0001 d + 01 0002 e f + 03 0005 + 01 0001 g + 00 + 00 + +9. Code + +There is code available for reading and parsing the various +S-expression formats proposed here. + +See http://theory.lcs.mit.edu/~rivest/sexp.html + + +10. Utilization of S-expressions + +This note has described S-expressions in general form. Application writers +may wish to restrict their use of S-expressions in various ways. Here are +some possible restrictions that might be considered: + + -- no display-hints + -- no lengths on hexadecimal, quoted-strings, or base-64 encodings + -- no empty lists + -- no empty octet-strings + -- no lists having another list as its first element + -- no base-64 or hexadecimal encodings + -- fixed limits on the size of octet-strings + +11. Historical note + +The S-expression technology described here was originally developed +for ``SDSI'' (the Simple Distributed Security Infrastructure by +Lampson and Rivest [SDSI]) in 1996, although the origins clearly date +back to McCarthy's LISP programming language. It was further refined +and improved during the merger of SDSI and SPKI [SPKI] during the +first half of 1997. S-expressions are similar to, but more readable +and flexible than, Bernstein's "net-strings" [BERN]. + +12. References + +[SDSI] "A Simple Distributed Security Architecture", by + Butler Lampson, and Ronald L. Rivest + http://theory.lcs.mit.edu/~cis/sdsi.html + +[SPKI] SPKI--A + Simple Public Key Infrastructure + +[BERN] Dan Bernstein's "net-strings"; Internet Draft + draft-bernstein-netstrings-02.txt + +Author's Address + + Ronald L. Rivest + Room 324, 545 Technology Square + MIT Laboratory for Computer Science + Cambridge, MA 02139 + + rivest@theory.lcs.mit.edu + + diff --git a/cmsg_private.h b/cmsg_private.h index f33dd1f..7add267 100644 --- a/cmsg_private.h +++ b/cmsg_private.h @@ -3,10 +3,14 @@ typedef struct cmsg_bytes_t { size_t len; - void *bytes; + unsigned char *bytes; } cmsg_bytes_t; -#define EMPTY_BYTES ((cmsg_bytes_t) { .len = 0, .bytes = NULL }) +#define CMSG_BYTES(length, bytes_ptr) ((cmsg_bytes_t) { \ + .len = (length), \ + .bytes = (unsigned char *) (bytes_ptr) \ + }) +#define EMPTY_BYTES CMSG_BYTES(0, NULL) extern cmsg_bytes_t cmsg_cstring_bytes(char const *cstr); extern cmsg_bytes_t cmsg_bytes_malloc_dup(cmsg_bytes_t src); @@ -17,7 +21,7 @@ extern void cmsg_bytes_free(cmsg_bytes_t bytes); #define BCHECK(result, message) do { if ((result) == 0) { perror(message); exit(2); } } while (0) #define PCHECK(result, message) do { if ((result) == NULL) { perror(message); exit(2); } } while (0) -extern void die(char const *format, ...); +extern __attribute__((noreturn)) void die(char const *format, ...); extern void warn(char const *format, ...); extern void info(char const *format, ...); diff --git a/harness.c b/harness.c index efa44df..0f9b691 100644 --- a/harness.c +++ b/harness.c @@ -186,10 +186,7 @@ cmsg_bytes_t iohandle_readwait(IOHandle *h, size_t at_least) { block_on_io(h, EV_READ); ICHECK(bufferevent_disable(h->io, EV_READ), "bufferevent_disable"); } - return (cmsg_bytes_t) { - .len = EVBUFFER_LENGTH(h->io->input), - .bytes = EVBUFFER_DATA(h->io->input) - }; + return CMSG_BYTES(EVBUFFER_LENGTH(h->io->input), EVBUFFER_DATA(h->io->input)); } void iohandle_drain(IOHandle *h, size_t count) { diff --git a/harness.h b/harness.h index f03359c..7f30309 100644 --- a/harness.h +++ b/harness.h @@ -21,8 +21,8 @@ typedef struct IOHandle { Process *p; int fd; struct bufferevent *io; - short error_direction; - short error_kind; + unsigned short error_direction; + unsigned short error_kind; } IOHandle; extern Process *current_process; diff --git a/hashtable.c b/hashtable.c new file mode 100644 index 0000000..813024c --- /dev/null +++ b/hashtable.c @@ -0,0 +1,136 @@ +#include +#include +#include +#include + +#include + +#include "cmsg_private.h" +#include "hashtable.h" + +uint32_t hash_bytes(cmsg_bytes_t bytes) { + /* http://en.wikipedia.org/wiki/Jenkins_hash_function */ + uint32_t hash = 0; + size_t i; + + for (i = 0; i < bytes.len; i++) { + hash += bytes.bytes[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +void init_hashtable(hashtable_t *table, + size_t initial_bucket_count, + void *(*dup_value)(void *), + void (*free_value)(void *)) +{ + table->bucket_count = initial_bucket_count; + table->entry_count = 0; + table->buckets = NULL; + table->dup_value = dup_value; + table->free_value = free_value; + + if (initial_bucket_count > 0) { + table->buckets = realloc(table->buckets, + initial_bucket_count * sizeof(hashtable_entry_t *)); + } +} + +static void destroy_entry(hashtable_t *table, hashtable_entry_t *entry) { + cmsg_bytes_free(entry->key); + if (table->free_value != NULL) { + table->free_value(entry->value); + } + free(entry); +} + +void destroy_hashtable(hashtable_t *table) { + if (table->buckets != NULL) { + int i; + for (i = 0; i < table->bucket_count; i++) { + hashtable_entry_t *chain = table->buckets[i]; + table->buckets[i] = NULL; + while (chain != NULL) { + hashtable_entry_t *next = chain->next; + destroy_entry(table, chain); + chain = next; + } + } + free(table->buckets); + } +} + +static hashtable_entry_t **hashtable_find(hashtable_t *table, cmsg_bytes_t key) { + uint32_t h = hash_bytes(key) % table->bucket_count; + hashtable_entry_t **entryptr = &(table->buckets[h]); + hashtable_entry_t *entry = *entryptr; + while (entry != NULL) { + if ((entry->key.len == key.len) && !memcmp(entry->key.bytes, key.bytes, key.len)) { + break; + } + entryptr = &entry->next; + entry = *entryptr; + } + return entryptr; +} + +int hashtable_get(hashtable_t *table, cmsg_bytes_t key, void **valueptr) { + hashtable_entry_t **entryptr = hashtable_find(table, key); + if (*entryptr == NULL) { + return 0; + } else { + *valueptr = (*entryptr)->value; + return 1; + } +} + +int hashtable_put(hashtable_t *table, cmsg_bytes_t key, void *value) { + /* TODO: grow and rehash */ + hashtable_entry_t **entryptr = hashtable_find(table, key); + if (*entryptr == NULL) { + hashtable_entry_t *entry = malloc(sizeof(hashtable_entry_t)); + entry->next = NULL; + entry->key = cmsg_bytes_malloc_dup(key); + entry->value = (table->dup_value == NULL) ? value : table->dup_value(value); + *entryptr = entry; + table->entry_count++; + return 1; + } else { + if (table->free_value != NULL) { + table->free_value((*entryptr)->value); + } + (*entryptr)->value = (table->dup_value == NULL) ? value : table->dup_value(value); + return 0; + } +} + +int hashtable_erase(hashtable_t *table, cmsg_bytes_t key) { + hashtable_entry_t **entryptr = hashtable_find(table, key); + if (*entryptr == NULL) { + return 0; + } else { + hashtable_entry_t *entry = *entryptr; + *entryptr = entry->next; + destroy_entry(table, entry); + table->entry_count--; + return 1; + } +} + +void hashtable_foreach(hashtable_t *table, + hashtable_iterator_t iterator, + void *context) +{ + int i; + for (i = 0; i < table->bucket_count; i++) { + hashtable_entry_t *chain; + for (chain = table->buckets[i]; chain != NULL; chain = chain->next) { + iterator(context, chain->key, chain->value); + } + } +} diff --git a/hashtable.h b/hashtable.h new file mode 100644 index 0000000..8d36774 --- /dev/null +++ b/hashtable.h @@ -0,0 +1,35 @@ +#ifndef cmsg_hashtable_h +#define cmsg_hashtable_h + +typedef struct hashtable_entry_t_ { + struct hashtable_entry_t_ *next; + cmsg_bytes_t key; + void *value; +} hashtable_entry_t; + +typedef struct hashtable_t_ { + size_t bucket_count; + size_t entry_count; + hashtable_entry_t **buckets; + void *(*dup_value)(void *); + void (*free_value)(void *); +} hashtable_t; + +typedef void (*hashtable_iterator_t)(void *context, cmsg_bytes_t key, void *value); + +extern uint32_t hash_bytes(cmsg_bytes_t bytes); + +extern void init_hashtable(hashtable_t *table, + size_t initial_bucket_count, + void *(*dup_value)(void *), + void (*free_value)(void *)); +extern void destroy_hashtable(hashtable_t *table); + +extern int hashtable_get(hashtable_t *table, cmsg_bytes_t key, void **valueptr); +extern int hashtable_put(hashtable_t *table, cmsg_bytes_t key, void *value); +extern int hashtable_erase(hashtable_t *table, cmsg_bytes_t key); +extern void hashtable_foreach(hashtable_t *table, + hashtable_iterator_t iterator, + void *context); + +#endif diff --git a/net.c b/net.c index 8a7ab6a..245c2ca 100644 --- a/net.c +++ b/net.c @@ -41,7 +41,7 @@ void get_addr_name(char *namebuf, size_t buflen, struct sockaddr_in const *sin) void endpoint_name(struct sockaddr_in const *peername, cmsg_bytes_t result) { char name[256]; get_addr_name(name, sizeof(name), peername); - snprintf(result.bytes, result.len, "%s:%d", name, ntohs(peername->sin_port)); + snprintf((char *) result.bytes, result.len, "%s:%d", name, ntohs(peername->sin_port)); } static void accept_connection(int servfd, short what, void *arg) { diff --git a/node.h b/node.h index f917c4a..6c42456 100644 --- a/node.h +++ b/node.h @@ -1,14 +1,16 @@ #ifndef cmsg_node_h #define cmsg_node_h -typedef struct Node { - struct NodeClass *node_class; +typedef struct node_t_ { + struct node_class_t_ *node_class; cmsg_bytes_t name; /* used as (partial) routing key for metamessages */ -} Node; +} node_t; -typedef struct NodeClass { - void (*destroy)(Node *n); - void (*handle_message)(Node *n, void *buffer, size_t len); -} NodeClass; +typedef struct node_class_t_ { + void (*destroy)(node_t *n); + void (*handle_message)(node_t *n, msg_t *m); +} node_class_t; + +extern node_t *new_node( #endif diff --git a/ref.h b/ref.h new file mode 100644 index 0000000..d84c811 --- /dev/null +++ b/ref.h @@ -0,0 +1,38 @@ +#ifndef cmsg_ref_h +#define cmsg_ref_h + +typedef struct refcount_t_ { + unsigned int count; +} refcount_t; + +#define ZERO_REFCOUNT() ((refcount_t) { .count = 0 }) + +#define INCREF(x) ({ \ + typeof(x) __x = (x); \ + if (__x != NULL) { \ + __x->refcount.count++; \ + } \ + __x; \ + }) + +#define UNGRAB(x) ({ \ + typeof(x) __x = (x); \ + if (__x != NULL) { \ + assert(__x->refcount.count); \ + __x->refcount.count--; \ + } \ + __x; \ + }) + +#define DECREF(x, dtor) ({ \ + typeof(x) __x = (x); \ + if (__x != NULL) { \ + (__x->refcount.count)--; \ + if (__x->refcount.count == 0) { \ + (dtor)(__x); \ + } \ + } \ + (typeof(__x)) 0; \ + }) + +#endif diff --git a/relay.c b/relay.c index 984e272..4ac3de0 100644 --- a/relay.c +++ b/relay.c @@ -22,6 +22,9 @@ typedef unsigned char u_char; #include "harness.h" #include "relay.h" #include "net.h" +#include "ref.h" +#include "sexp.h" +#include "sexpio.h" struct boot_args { struct sockaddr_in peername; @@ -30,10 +33,11 @@ struct boot_args { static void relay_main(struct boot_args *args) { IOHandle *h = new_iohandle(args->fd); + IOHandle *out = new_iohandle(1); { char name[256]; - endpoint_name(&args->peername, (cmsg_bytes_t) { .bytes = name, .len = sizeof(name) }); + endpoint_name(&args->peername, CMSG_BYTES(sizeof(name), name)); info("Accepted connection from %s on fd %d\n", name, args->fd); } @@ -43,31 +47,37 @@ static void relay_main(struct boot_args *args) { ICHECK(iohandle_flush(h), "iohandle_flush 1"); nap(1000); iohandle_write(h, cmsg_cstring_bytes("Proceed\n")); - iohandle_settimeout(h, 3, 0); - while (1) { - cmsg_bytes_t buf = iohandle_readwait(h, 1); - if (buf.len == 0) { - switch (h->error_kind) { - case EVBUFFER_TIMEOUT: - info("Timeout\n"); - iohandle_clear_error(h); - iohandle_write(h, cmsg_cstring_bytes("Timed out\n")); - break; - default: - info("Error! 0x%04X\n", h->error_kind); - break; - } - break; - } else { - info("Read %d: %.*s\n", buf.len, buf.len, buf.bytes); - iohandle_drain(h, buf.len); + //iohandle_settimeout(h, 3, 0); + + loop: + { + sexp_t *x = sexp_read(h); + switch (h->error_kind) { + case 0: + fflush(NULL); + sexp_write(out, x); + iohandle_write(out, cmsg_cstring_bytes("\n")); + ICHECK(iohandle_flush(out), "iohandle_flush out"); + DECREF(x, sexp_destructor); + iohandle_write(h, cmsg_cstring_bytes("OK, proceed\n")); + goto loop; + + case EVBUFFER_TIMEOUT: + info("Timeout\n"); + iohandle_clear_error(h); + iohandle_write(h, cmsg_cstring_bytes("Timed out\n")); + ICHECK(iohandle_flush(h), "iohandle_flush 2"); + break; + + default: + info("Error! 0x%04X\n", h->error_kind); + break; } - iohandle_write(h, cmsg_cstring_bytes("OK, proceed\n")); } - ICHECK(iohandle_flush(h), "iohandle_flush 2"); ICHECK(close(h->fd), "close"); delete_iohandle(h); + delete_iohandle(out); } void start_relay(struct sockaddr_in const *peername, int fd) { diff --git a/sexp.c b/sexp.c new file mode 100644 index 0000000..bc28820 --- /dev/null +++ b/sexp.c @@ -0,0 +1,121 @@ +#include +#include +#include + +#include + +#include "cmsg_private.h" +#include "ref.h" +#include "sexp.h" + +static sexp_t *freelist = NULL; + +static inline sexp_t *alloc_shell(sexp_type_t kind) { + sexp_t *x = freelist; + if (x == NULL) { + x = malloc(sizeof(*x)); + } else { + freelist = x->data.pair.tail; + } + x->refcount = ZERO_REFCOUNT(); + x->kind = kind; + return x; +} + +static inline void release_shell(sexp_t *x) { + x->data.pair.tail = freelist; + freelist = x; +} + +void sexp_data_destructor(sexp_data_t *data) { + cmsg_bytes_free(data->data); + free(data); +} + +void sexp_destructor(sexp_t *x) { + tail_recursion: + switch (x->kind) { + case SEXP_BYTES: + cmsg_bytes_free(x->data.bytes); + break; + case SEXP_SLICE: + DECREF(x->data.slice.data, sexp_data_destructor); + break; + case SEXP_DISPLAY_HINT: + case SEXP_PAIR: { + sexp_t *next = x->data.pair.tail; + DECREF(x->data.pair.head, sexp_destructor); + if (next != NULL) { + if (next->refcount.count == 1) { + release_shell(x); + x = next; + goto tail_recursion; + } else { + DECREF(next, sexp_destructor); + } + } + break; + } + default: + die("Unknown sexp kind %d in dtor\n", x->kind); + } + release_shell(x); +} + +sexp_data_t *sexp_data_copy(cmsg_bytes_t body, size_t offset, size_t length) { + assert(offset + length <= body.len); + return sexp_data_alias(cmsg_bytes_malloc_dup(CMSG_BYTES(length, body.bytes + offset))); +} + +sexp_data_t *sexp_data_alias(cmsg_bytes_t body) { + sexp_data_t *data = malloc(sizeof(*data)); + data->refcount = ZERO_REFCOUNT(); + data->data = body; + return data; +} + +sexp_t *sexp_bytes(cmsg_bytes_t bytes) { + sexp_t *x = alloc_shell(SEXP_BYTES); + x->data.bytes = cmsg_bytes_malloc_dup(bytes); + return x; +} + +sexp_t *sexp_slice(sexp_data_t *data, size_t offset, size_t length) { + sexp_t *x = alloc_shell(SEXP_SLICE); + x->data.slice.data = INCREF(data); + x->data.slice.offset = offset; + x->data.slice.length = length; + return x; +} + +sexp_t *sexp_display_hint(sexp_t *hint, sexp_t *body) { + sexp_t *x = alloc_shell(SEXP_DISPLAY_HINT); + assert(sexp_simple_stringp(hint)); + assert(sexp_simple_stringp(body)); + x->data.pair.head = INCREF(hint); + x->data.pair.tail = INCREF(body); + return x; +} + +sexp_t *sexp_cons(sexp_t *head, sexp_t *tail) { + sexp_t *x = alloc_shell(SEXP_PAIR); + x->data.pair.head = INCREF(head); + x->data.pair.tail = INCREF(tail); + return x; +} + +cmsg_bytes_t sexp_data(sexp_t *x) { + restart: + switch (x->kind) { + case SEXP_BYTES: + return x->data.bytes; + case SEXP_SLICE: + return CMSG_BYTES(x->data.slice.length, + x->data.slice.data->data.bytes + x->data.slice.offset); + case SEXP_DISPLAY_HINT: + x = x->data.pair.tail; + goto restart; + default: + die("Unknown sexp kind %d in data accessor\n", x->kind); + } +} diff --git a/sexp.h b/sexp.h new file mode 100644 index 0000000..76905d5 --- /dev/null +++ b/sexp.h @@ -0,0 +1,99 @@ +#ifndef cmsg_sexp_h +#define cmsg_sexp_h + +typedef struct sexp_data_t_ { + refcount_t refcount; + cmsg_bytes_t data; +} sexp_data_t; + +typedef enum sexp_type_t_ { + SEXP_BYTES, + SEXP_SLICE, + SEXP_DISPLAY_HINT, + SEXP_PAIR +} sexp_type_t; + +typedef struct sexp_t_ { + refcount_t refcount; + sexp_type_t kind; + union { + cmsg_bytes_t bytes; + struct { + sexp_data_t *data; + size_t offset; + size_t length; + } slice; + struct { + struct sexp_t_ *head; + struct sexp_t_ *tail; + } pair; /* and display-hint */ + } data; +} sexp_t; + +extern void sexp_data_destructor(sexp_data_t *data); +extern void sexp_destructor(sexp_t *x); + +extern sexp_data_t *sexp_data_copy(cmsg_bytes_t body, size_t offset, size_t length); +extern sexp_data_t *sexp_data_alias(cmsg_bytes_t body); + +extern sexp_t *sexp_bytes(cmsg_bytes_t bytes); +extern sexp_t *sexp_slice(sexp_data_t *data, size_t offset, size_t length); +extern sexp_t *sexp_display_hint(sexp_t *hint, sexp_t *body); +extern sexp_t *sexp_cons(sexp_t *head, sexp_t *tail); + +#define sexp_simple_stringp(x) ({ \ + sexp_t *__x = (x); \ + (__x != NULL) && ((__x->kind == SEXP_BYTES) || (__x->kind == SEXP_SLICE)); \ + }) + +#define sexp_stringp(x) ({ \ + sexp_t *__x = (x); \ + sexp_simple_stringp(__x) || ((__x != NULL) && (__x->kind == SEXP_DISPLAY_HINT)); \ + } + +#define sexp_pairp(x) ({ \ + sexp_t *__x = (x); \ + (__x != NULL) && (__x->kind == SEXP_PAIR); \ + }) + +extern cmsg_bytes_t sexp_data(sexp_t *x); +#define sexp_head(x) ({sexp_t *__x = (x); assert(__x->kind == SEXP_PAIR); __x->data.pair.head;}) +#define sexp_tail(x) ({sexp_t *__x = (x); assert(__x->kind == SEXP_PAIR); __x->data.pair.tail;}) +#define sexp_hint(x) ({sexp_t *__x = (x); assert(__x->kind == SEXP_DISPLAY_HINT); __x->data.pair.head;}) +#define sexp_body(x) ({sexp_t *__x = (x); assert(__x->kind == SEXP_DISPLAY_HINT); __x->data.pair.tail;}) + +#define sexp_setter_(x,y,fieldname) \ + ({ \ + sexp_t *__x = (x); \ + sexp_t *__y = (y); \ + sexp_t *__old; \ + assert(__x->kind == SEXP_PAIR); \ + INCREF(__y); \ + __old = __x->data.pair.fieldname; \ + __x->data.pair.fieldname = __y; \ + DECREF(__old, sexp_destructor); \ + __x; \ + }) + +#define sexp_sethead(x,y) sexp_setter_(x,y,head) +#define sexp_settail(x,y) sexp_setter_(x,y,tail) + +#define sexp_push(stackvar,val) \ + ({ \ + sexp_t *__oldstack = stackvar; \ + stackvar = INCREF(sexp_cons((val), stackvar)); \ + DECREF(__oldstack, sexp_destructor); \ + stackvar; \ + }) + +#define sexp_pop(stackvar) \ + ({ \ + sexp_t *__nextstack = INCREF(sexp_tail(stackvar)); \ + sexp_t *__val = INCREF(sexp_head(stackvar)); \ + DECREF(stackvar, sexp_destructor); \ + stackvar = __nextstack; \ + UNGRAB(__val); \ + __val; \ + }) + +#endif diff --git a/sexpio.c b/sexpio.c new file mode 100644 index 0000000..c06d21b --- /dev/null +++ b/sexpio.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include + +#include + +#include + +#include "cmsg_private.h" +#include "ref.h" +#include "sexp.h" +#include "harness.h" +#include "sexpio.h" + +/* TODO: limit size of individual simple strings */ +/* TODO: limit nesting of sexps */ + +static sexp_t *read_simple_string(IOHandle *h, cmsg_bytes_t buf) { + int i = 0; + sexp_t *result; + + while (1) { + buf = iohandle_readwait(h, buf.len + 1); + if (h->error_kind) return NULL; + /* Don't reset i to zero: avoids scanning the beginning of the + number repeatedly */ + + while (i < buf.len) { + if (i > 10) { + /* More than ten digits of length prefix. We're unlikely to be + able to cope with anything that large. */ + h->error_kind = SEXP_ERROR_OVERFLOW; + return NULL; + } + if (buf.bytes[i] == ':') { + size_t count; + buf.bytes[i] = '\0'; + count = atoi((char *) buf.bytes); + iohandle_drain(h, i + 1); + buf = iohandle_readwait(h, count); + buf.len = count; + result = sexp_bytes(buf); + iohandle_drain(h, count); + return result; + } + + if (!isdigit(buf.bytes[i])) { + h->error_kind = SEXP_ERROR_SYNTAX; + return NULL; + } + + i++; + } + } +} + +#define CHECKH \ + if (h->error_kind) goto error; + +#define READ1 \ + buf = iohandle_readwait(h, 1); \ + CHECKH; + +sexp_t *sexp_read(IOHandle *h) { + cmsg_bytes_t buf; + sexp_t *stack = NULL; /* held */ + sexp_t *hint = NULL; /* held */ + sexp_t *body = NULL; /* held */ + sexp_t *accumulator = NULL; /* not held */ + + while (1) { + READ1; + switch (buf.bytes[0]) { + case '[': { + iohandle_drain(h, 1); + hint = INCREF(read_simple_string(h, EMPTY_BYTES)); + CHECKH; + READ1; + if (buf.bytes[0] != ']') { + h->error_kind = SEXP_ERROR_SYNTAX; + goto error; + } + iohandle_drain(h, 1); + body = INCREF(read_simple_string(h, EMPTY_BYTES)); + CHECKH; + accumulator = sexp_display_hint(hint, body); + DECREF(hint, sexp_destructor); /* these could be UNGRABs */ + DECREF(body, sexp_destructor); + break; + } + + case '(': + iohandle_drain(h, 1); + sexp_push(stack, sexp_cons(NULL, NULL)); + continue; + + case ')': { + sexp_t *current; + if (stack == NULL) { + h->error_kind = SEXP_ERROR_SYNTAX; + goto error; + } + current = sexp_pop(stack); + iohandle_drain(h, 1); + accumulator = INCREF(sexp_head(current)); + DECREF(current, sexp_destructor); + UNGRAB(accumulator); + break; + } + + default: + if (isspace(buf.bytes[0])) { + iohandle_drain(h, 1); + continue; + } + buf.len = 1; /* needed to avoid reading too much in read_simple_string */ + accumulator = read_simple_string(h, buf); + if (h->error_kind) goto error; + break; + } + + if (stack == NULL) { + return accumulator; + } else { + sexp_t *current = sexp_head(stack); /* not held */ + sexp_t *cell = sexp_cons(accumulator, NULL); + if (sexp_tail(current) == NULL) { + sexp_sethead(current, cell); + } else { + sexp_settail(sexp_tail(current), cell); + } + sexp_settail(current, cell); + } + } + + error: + DECREF(stack, sexp_destructor); + DECREF(hint, sexp_destructor); + DECREF(body, sexp_destructor); + return NULL; +} + +void write_simple_string(IOHandle *h, sexp_t *x) { + cmsg_bytes_t data = sexp_data(x); + char lenstr[16]; + snprintf(lenstr, sizeof(lenstr), "%u:", (unsigned int) data.len); + lenstr[sizeof(lenstr) - 1] = '\0'; + iohandle_write(h, cmsg_cstring_bytes(lenstr)); + iohandle_write(h, data); +} + +unsigned short sexp_write(IOHandle *h, sexp_t *x) { + sexp_t *stack = NULL; /* held */ + sexp_t *current = x; + + write1: + if (current == NULL) { + iohandle_write(h, cmsg_cstring_bytes("()")); + } else { + switch (current->kind) { + case SEXP_BYTES: + case SEXP_SLICE: + write_simple_string(h, current); + break; + + case SEXP_DISPLAY_HINT: + iohandle_write(h, cmsg_cstring_bytes("[")); + write_simple_string(h, sexp_hint(current)); + iohandle_write(h, cmsg_cstring_bytes("]")); + write_simple_string(h, sexp_body(current)); + break; + + case SEXP_PAIR: + iohandle_write(h, cmsg_cstring_bytes("(")); + sexp_push(stack, current); + break; + + default: + die("Unknown sexp kind %d in sexp_write\n", current->kind); + } + } + + check_stack: + if (stack == NULL) { + return 0; + } + + { + sexp_t *cell = sexp_head(stack); + if (cell == NULL) { + iohandle_write(h, cmsg_cstring_bytes(")")); + sexp_pop(stack); /* no need to worry about incref/decref: it's NULL! */ + goto check_stack; + } + + if (sexp_pairp(cell)) { + current = sexp_head(cell); + sexp_sethead(stack, sexp_tail(cell)); + goto write1; + } + + return SEXP_ERROR_SYNTAX; + } +} diff --git a/sexpio.h b/sexpio.h new file mode 100644 index 0000000..256b88a --- /dev/null +++ b/sexpio.h @@ -0,0 +1,10 @@ +#ifndef cmsg_sexpio_h +#define cmsg_sexpio_h + +#define SEXP_ERROR_OVERFLOW 0x8000 +#define SEXP_ERROR_SYNTAX 0x8001 + +extern sexp_t *sexp_read(IOHandle *h); +extern unsigned short sexp_write(IOHandle *h, sexp_t *x); + +#endif