commit a11fdc05005d38f4902972117d337555874cfddf Author: Tony Garnock-Jones Date: Sun Jan 8 12:41:04 2012 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e63542 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +scratch/ +_build/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8745497 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +APP=ocamlmsg + +all: message.ml $(APP).native + +message.ml: messages.json codegen.py + python codegen.py > $@ + +clean: + ocamlbuild -clean + rm -f message.ml + +$(APP).native: $(wildcard *.ml) + ocamlbuild $@ + +run: all + ./$(APP).native diff --git a/_tags b/_tags new file mode 100644 index 0000000..809c5b8 --- /dev/null +++ b/_tags @@ -0,0 +1,2 @@ +true: use_unix +true: thread diff --git a/codegen.py b/codegen.py new file mode 100644 index 0000000..f3499d0 --- /dev/null +++ b/codegen.py @@ -0,0 +1,88 @@ +from __future__ import with_statement + +# Copyright (C) 2012 Tony Garnock-Jones. All rights reserved. +copyright_stmt = '(* Copyright (C) 2012 Tony Garnock-Jones. All rights reserved. *)' + +import sys +import json + +def mlify(s): + s = s.replace('-', '_') + s = s.replace(' ', '_') + return s + +class MessageType: + def __init__(self, j): + self.wire_selector = j['selector'] + self.selector = mlify(self.wire_selector) + self.constructor = self.selector.capitalize() + self.wire_argnames = j['args'] + self.argnames = map(mlify, self.wire_argnames) + + def format_args(self, template, separator = ', '): + return separator.join([template % (x,) for x in self.argnames]) + +with file("messages.json") as f: + spec = map(MessageType, json.load(f)) + +def print_list(o, xs, sep, c): + sys.stdout.write(o) + needsep = False + for x in xs: + if needsep: + sys.stdout.write(sep) + else: + needsep = True + sys.stdout.write(x) + sys.stdout.write(c) + +def print_codec(): + print copyright_stmt + print + print 'open Sexp' + print + print 'type t =' + for t in spec: + if t.argnames: + print ' | %s of ' % (t.constructor), + print_list('(', ['Sexp.t' for n in t.argnames], ' * ', ')\n') + else: + print ' | %s' % t.constructor + print ' | UNKNOWN of Sexp.t' + print + print 'let sexp_of_message m = match m with' + for t in spec: + sys.stdout.write(' | %s' % t.constructor) + if t.argnames: + print_list(' (', [n for n in t.argnames], ', ', ')') + print ' ->' + sys.stdout.write(' Arr [Str "%s"' % t.wire_selector) + if t.argnames: + print_list('; ', t.argnames, '; ', '') + print ']' + print ' | UNKNOWN s -> s' + print + print 'let message_of_sexp s = match s with' + for t in spec: + sys.stdout.write(' | Arr [Str "%s"' % t.wire_selector) + if t.argnames: + print_list('; ', t.argnames, '; ', '') + print '] ->' + sys.stdout.write(' %s' % t.constructor) + if t.argnames: + print_list(' (', [n for n in t.argnames], ', ', ')') + print + print ' | _ -> UNKNOWN s' + print + for t in spec: + sys.stdout.write('let %s' % t.selector) + if t.argnames: + print_list(' (', t.argnames, ', ', ')') + sys.stdout.write(' = sexp_of_message (%s' % t.constructor) + if t.argnames: + print_list(' (', t.argnames, ', ', ')') + sys.stdout.write(')\n') + print + +if __name__ == '__main__': + print_codec() diff --git a/datastructures.ml b/datastructures.ml new file mode 100644 index 0000000..be92e2a --- /dev/null +++ b/datastructures.ml @@ -0,0 +1,2 @@ +module StringSet = Set.Make(String) +module StringMap = Map.Make(String) diff --git a/doc/Sexp.txt b/doc/Sexp.txt new file mode 100644 index 0000000..caf4542 --- /dev/null +++ b/doc/Sexp.txt @@ -0,0 +1,699 @@ +Network Working Group R. Rivest +Internet Draft May 4, 1997 +Expires November 4, 1997 + + + S-Expressions + draft-rivest-sexp-00.txt + + +Status of this Memo + + Distribution of this memo is unlimited. + + This document is an Internet-Draft. Internet Drafts are working + documents of the Internet Engineering Task Force (IETF), its Areas, + and its Working Groups. Note that other groups may also distribute + working documents as Internet Drafts. + + Internet Drafts are draft documents valid for a maximum of six + months, and may be updated, replaced, or obsoleted by other documents + at any time. It is not appropriate to use Internet Drafts as + reference material, or to cite them other than as a ``working draft'' + or ``work in progress.'' + + To learn the current status of any Internet-Draft, please check the + ``1id-abstracts.txt'' listing contained in the internet-drafts Shadow + Directories on: ftp.is.co.za (Africa), nic.nordu.net (Europe), + ds.internic.net (US East Coast), ftp.isi.edu (US West Coast), + or munnari.oz.au (Pacific Rim) + + +Abstract + +This memo describes a data structure called "S-expressions" that are +suitable for representing arbitrary complex data structures. We make +precise the encodings of S-expressions: we give a "canonical form" for +S-expressions, described two "transport" representations, and also +describe an "advanced" format for display to people. + + + +1. Introduction + +S-expressions are data structures for representing complex data. They +are either byte-strings ("octet-strings") or lists of simpler +S-expressions. Here is a sample S-expression: + + (snicker "abc" (#03# |YWJj|)) + +It is a list of length three: + + -- the octet-string "snicker" + + -- the octet-string "abc" + + -- a sub-list containing two elements: + - the hexadecimal constant #03# + - the base-64 constant |YWJj| (which is the same as "abc") + +This note gives a specific proposal for constructing and utilizing +S-expressions. The proposal is independent of any particular application. + +Here are the design goals for S-expressions: + + -- generality: S-expressions should be good at representing arbitrary + data. + + -- readability: it should be easy for someone to examine and + understand the structure of an S-expression. + + -- economy: S-expressions should represent data compactly. + + -- tranportability: S-expressions should be easy to transport + over communication media (such as email) that are known to be + less than perfect. + + -- flexibility: S-expressions should make it relatively simple to + modify and extend data structures. + + -- canonicalization: it should be easy to produce a unique + "canonical" form of an S-expression, for digital signature purposes. + + -- efficiency: S-expressions should admit in-memory representations + that allow efficient processing. + + +Section 2 gives an introduction to S-expressions. +Section 3 discusses the character sets used. +Section 4 presents the various representations of octet-strings. +Section 5 describes how to represent lists. +Section 6 discusses how S-expressions are represented for various uses. +Section 7 gives a BNF syntax for S-expressions. +Section 8 talks about how S-expressions might be represented in memory. +Section 9 briefly describes implementations for handling S-expressions. +Section 10 discusses how applications might utilize S-expressions. +Section 11 gives historical notes on S-expressions. +Section 12 gives references. + +2. S-expressions -- informal introduction + +Informally, an S-expression is either: + -- an octet-string, or + -- a finite list of simpler S-expressions. + +An octet-string is a finite sequence of eight-bit octets. There may be +many different but equivalent ways of representing an octet-string + + abc -- as a token + + "abc" -- as a quoted string + + #616263# -- as a hexadecimal string + + 3:abc -- as a length-prefixed "verbatim" encoding + + {MzphYmM=} -- as a base-64 encoding of the verbatim encoding + (that is, an encoding of "3:abc") + + |YWJj| -- as a base-64 encoding of the octet-string "abc" + +These encodings are all equivalent; they all denote the same octet string. + +We will give details of these encodings later on, and also describe how to +give a "display type" to a byte string. + +A list is a finite sequence of zero or more simpler S-expressions. A list +may be represented by using parentheses to surround the sequence of encodings +of its elements, as in: + + (abc (de #6667#) "ghi jkl") + +As we see, there is variability possible in the encoding of an +S-expression. In some cases, it is desirable to standardize or +restrict the encodings; in other cases it is desirable to have no +restrictions. The following are the target cases we aim to handle: + + -- a "transport" encoding for transporting the S-expression between + computers. + + -- a "canonical" encoding, used when signing the S-expression. + + -- an "advanced" encoding used for input/output to people. + + -- an "in-memory" encoding used for processing the S-expression in + the computer. + +These need not be different; in this proposal the canonical encoding +is the same as the transport encoding, for example. In this note we +propose (related) encoding techniques for each of these uses. + +3. Character set + +We will be describing encodings of S-expressions. Except when giving +"verbatim" encodings, the character set used is limited to the following +characters in US-ASCII: + Alphabetic: A B ... Z a b ... z + numeric: 0 1 ... 9 + whitespace: space, horizontal tab, vertical tab, form-feed + carriage-return, line-feed + The following graphics characters, which we call "pseudo-alphabetic": + - hyphen or minus + . period + / slash + _ underscore + : colon + * asterisk + + plus + = equal + The following graphics characters, which are "reserved punctuation": + ( left parenthesis + ) right parenthesis + [ left bracket + ] right bracket + { left brace + } right brace + | vertical bar + # number sign + " double quote + & ampersand + \ backslash + The following characters are unused and unavailable, except in + "verbatim" encodings: + ! exclamation point + % percent + ^ circumflex + ~ tilde + ; semicolon + ' apostrophe + , comma + < less than + > greater than + ? question mark + + +4. Octet string representations + +This section describes in detail the ways in which an octet-string may +be represented. + +We recall that an octet-string is any finite sequence of octets, and +that the octet-string may have length zero. + + +4.1 Verbatim representation + +A verbatim encoding of an octet string consists of four parts: + + -- the length (number of octets) of the octet-string, + given in decimal most significant digit first, with + no leading zeros. + + -- a colon ":" + + -- the octet string itself, verbatim. + +There are no blanks or whitespace separating the parts. No "escape +sequences" are interpreted in the octet string. This encoding is also +called a "binary" or "raw" encoding. + +Here are some sample verbatim encodings: + + 3:abc + 7:subject + 4::::: + 12:hello world! + 10:abcdefghij + 0: + +4.2 Quoted-string representation + +The quoted-string representation of an octet-string consists of: + + -- an optional decimal length field + + -- an initial double-quote (") + + -- the octet string with "C" escape conventions (\n,etc) + + -- a final double-quote (") + +The specified length is the length of the resulting string after any +escape sequences have been handled. The string does not have any +"terminating NULL" that C includes, and the length does not count such +a character. + +The length is optional. + +The escape conventions within the quoted string are as follows (these follow +the "C" programming language conventions, with an extension for +ignoring line terminators of just LF or CRLF): + \b -- backspace + \t -- horizontal tab + \v -- vertical tab + \n -- new-line + \f -- form-feed + \r -- carriage-return + \" -- double-quote + \' -- single-quote + \\ -- back-slash + \ooo -- character with octal value ooo (all three digits + must be present) + \xhh -- character with hexadecimal value hh (both digits + must be present) + \ -- causes carriage-return to be ignored. + \ -- causes linefeed to be ignored + \ -- causes CRLF to be ignored. + \ -- causes LFCR to be ignored. + +Here are some examples of quoted-string encodings: + + "subject" + "hi there" + 7"subject" + 3"\n\n\n" + "This has\n two lines." + "This has\ + one." + "" + +4.3 Token representation + +An octet string that meets the following conditions may be given +directly as a "token". + + -- it does not begin with a digit + + -- it contains only characters that are + -- alphabetic (upper or lower case), + -- numeric, or + -- one of the eight "pseudo-alphabetic" punctuation marks: + - . / _ : * + = + (Note: upper and lower case are not equivalent.) + (Note: A token may begin with punctuation, including ":"). + +Here are some examples of token representations: + + subject + not-before + class-of-1997 + //microsoft.com/names/smith + * + + +4.4 Hexadecimal representation + +An octet-string may be represented with a hexadecimal encoding consisting of: + + -- an (optional) decimal length of the octet string + + -- a sharp-sign "#" + + -- a hexadecimal encoding of the octet string, with each octet + represented with two hexadecimal digits, most significant + digit first. + + -- a sharp-sign "#" + +There may be whitespace inserted in the midst of the hexadecimal +encoding arbitrarily; it is ignored. It is an error to have +characters other than whitespace and hexadecimal digits. + +Here are some examples of hexadecimal encodings: + + #616263# -- represents "abc" + 3#616263# -- also represents "abc" + # 616 + 263 # -- also represents "abc" + + +4.5 Base-64 representation + +An octet-string may be represented in a base-64 coding consisting of: + + -- an (optional) decimal length of the octet string + + -- a vertical bar "|" + + -- the rfc 1521 base-64 encoding of the octet string. + + -- a final vertical bar "|" + +The base-64 encoding uses only the characters + A-Z a-z 0-9 + / = +It produces four characters of output for each three octets of input. +If the input has one or two left-over octets of input, it produces an +output block of length four ending in two or one equals signs, respectively. +Output routines compliant with this standard MUST output the equals signs +as specified. Input routines MAY accept inputs where the equals signs are +dropped. + +There may be whitespace inserted in the midst of the base-64 encoding +arbitrarily; it is ignored. It is an error to have characters other +than whitespace and base-64 characters. + +Here are some examples of base-64 encodings: + + |YWJj| -- represents "abc" + | Y W + J j | -- also represents "abc" + 3|YWJj| -- also represents "abc" + |YWJjZA==| -- represents "abcd" + |YWJjZA| -- also represents "abcd" + + +4.6 Display hint + +Any octet string may be preceded by a single "display hint". + +The purposes of the display hint is to provide information on how +to display the octet string to a user. It has no other function. +Many of the MIME types work here. + +A display-hint is an octet string surrounded by square brackets. +There may be whitespace separating the octet string from the +surrounding brackets. Any of the legal formats may be used for the +octet string. + +Here are some examples of display-hints: + + [image/gif] + [URI] + [charset=unicode-1-1] + [text/richtext] + [application/postscript] + [audio/basic] + ["http://abc.com/display-types/funky.html"] + +In applications an octet-string that is untyped may be considered to have +a pre-specified "default" mime type. The mime type + "text/plain; charset=iso-8859-1" +is the standard default. + + +4.7 Equality of octet-strings + +Two octet strings are considered to be "equal" if and only if they +have the same display hint and the same data octet strings. + +Note that octet-strings are "case-sensitive"; the octet-string "abc" +is not equal to the octet-string "ABC". + +An untyped octet-string can be compared to another octet-string (typed +or not) by considering it as a typed octet-string with the default +mime-type. + + +5. Lists + +Just as with octet-strings, there are several ways to represent an +S-expression. Whitespace may be used to separate list elements, but +they are only required to separate two octet strings when otherwise +the two octet strings might be interpreted as one, as when one token +follows another. Also, whitespace may follow the initial left +parenthesis, or precede the final right parenthesis. + +Here are some examples of encodings of lists: + + (a b c) + + ( a ( b c ) ( ( d e ) ( e f ) ) ) + + (11:certificate(6:issuer3:bob)(7:subject5:alice)) + + ({3Rt=} "1997" murphy 3:{XC++}) + + +6. Representation types + +There are three "types" of representations: + + -- canonical + + -- basic transport + + -- advanced transport + +The first two MUST be supported by any implementation; the last is +optional. + + +6.1 Canonical representation + +This canonical representation is used for digital signature purposes, +transmission, etc. It is uniquely defined for each S-expression. It +is not particularly readable, but that is not the point. It is +intended to be very easy to parse, to be reasonably economical, and to +be unique for any S-expression. + +The "canonical" form of an S-expression represents each octet-string +in verbatim mode, and represents each list with no blanks separating +elements from each other or from the surrounding parentheses. + +Here are some examples of canonical representations of S-expressions: + + (6:issuer3:bob) + + (4:icon[12:image/bitmap]9:xxxxxxxxx) + + (7:subject(3:ref5:alice6:mother)) + + +6.2 Basic transport representation + +There are two forms of the "basic transport" representation: + + -- the canonical representation + + -- an rfc-2045 base-64 representation of the canonical representation, + surrounded by braces. + +The transport mechanism is intended to provide a universal means of +representing S-expressions for transport from one machine to another. + +Here are some examples of an S-expression represented in basic +transport mode: + + (1:a1:b1:c) + + {KDE6YTE6YjE6YykA} + + (this is the same S-expression encoded in base-64) + +There is a difference between the brace notation for base-64 used here +and the || notation for base-64'd octet-strings described above. Here +the base-64 contents are converted to octets, and then re-scanned as +if they were given originally as octets. With the || notation, the +contents are just turned into an octet-string. + + +6.3 Advanced transport representation + +The "advanced transport" representation is intended to provide more +flexible and readable notations for documentation, design, debugging, +and (in some cases) user interface. + +The advanced transport representation allows all of the representation +forms described above, include quoted strings, base-64 and hexadecimal +representation of strings, tokens, representations of strings with +omitted lengths, and so on. + + +7. BNF for syntax + +We give separate BNF's for canonical and advanced forms of S-expressions. +We use the following notation: + * means 0 or more occurrences of + + means 1 or more occurrences of + ? means 0 or 1 occurrences of + parentheses are used for grouping, as in ( | )* + +For canonical and basic transport: + + :: | + :: ? ; + :: ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: "(" * ")" ; + :: "0" | ... | "9" ; + +For advanced transport: + + :: | + :: ? ; + :: | | | | + ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: + ; + :: ? "|" ( | )* "|" ; + :: "#" ( | )* "#" ; + :: ? + :: "\"" "\"" + :: "(" ( | )* ")" ; + :: * ; + :: | | ; + :: | | ; + :: "a" | ... | "z" ; + :: "A" | ... | "Z" ; + :: "0" | ... | "9" ; + :: | "A" | ... | "F" | "a" | ... | "f" ; + :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; + :: " " | "\t" | "\r" | "\n" ; + :: | | "+" | "/" | "=" ; + :: "" ; + +8. In-memory representations + +For processing, the S-expression would typically be parsed and represented +in memory in a more more amenable to efficient processing. We suggest +two alternatives: + + -- "list-structure" + + -- "array-layout" + +We only sketch these here, as they are only suggestive. The code referenced +below illustrates these styles in more detail. + + +8.1. List-structure memory representation + +Here there are separate records for simple-strings, strings, and +lists. An S-expression of the form ("abc" "de") would require two +records for the simple strings, two for the strings, and two for the +list elements. This is a fairly conventional representation, and +details are omitted here. + +8.2 Array-layout memory representation + +Here each S-expression is represented as a contiguous array of bytes. +The first byte codes the "type" of the S-expression: + + 01 octet-string + + 02 octet-string with display-hint + + 03 beginning of list (and 00 is used for "end of list") + +Each of the three types is immediately followed by a k-byte integer +indicating the size (in bytes) of the following representation. Here +k is an integer that depends on the implementation, it might be +anywhere from 2 to 8, but would be fixed for a given implementation; +it determines the size of the objects that can be handled. The transport +and canonical representations are independent of the choice of k made by +the implementation. + +Although the length of lists are not given in the usual S-expression +notations, it is easy to fill them in when parsing; when you reach a +right-parenthesis you know how long the list representation was, and +where to go back to fill in the missing length. + + +8.2.1 Octet string + +This is represented as follows: + + 01 + +For example (here k = 2) + + 01 0003 a b c + +8.2.2 Octet-string with display-hint + +This is represented as follows: + + 02 + 01 /* for display-type */ + 01 /* for octet-string */ + +For example, the S-expression + + [gif] #61626364# + +would be represented as (with k = 2) + + 02 000d + 01 0003 g i f + 01 0004 61 62 63 64 + +8.2.3 List + +This is represented as + + 03 ... 00 + +For example, the list (abc [d]ef (g)) is represented in memory as (with k=2) + + 03 001b + 01 0003 a b c + 02 0009 + 01 0001 d + 01 0002 e f + 03 0005 + 01 0001 g + 00 + 00 + +9. Code + +There is code available for reading and parsing the various +S-expression formats proposed here. + +See http://theory.lcs.mit.edu/~rivest/sexp.html + + +10. Utilization of S-expressions + +This note has described S-expressions in general form. Application writers +may wish to restrict their use of S-expressions in various ways. Here are +some possible restrictions that might be considered: + + -- no display-hints + -- no lengths on hexadecimal, quoted-strings, or base-64 encodings + -- no empty lists + -- no empty octet-strings + -- no lists having another list as its first element + -- no base-64 or hexadecimal encodings + -- fixed limits on the size of octet-strings + +11. Historical note + +The S-expression technology described here was originally developed +for ``SDSI'' (the Simple Distributed Security Infrastructure by +Lampson and Rivest [SDSI]) in 1996, although the origins clearly date +back to McCarthy's LISP programming language. It was further refined +and improved during the merger of SDSI and SPKI [SPKI] during the +first half of 1997. S-expressions are similar to, but more readable +and flexible than, Bernstein's "net-strings" [BERN]. + +12. References + +[SDSI] "A Simple Distributed Security Architecture", by + Butler Lampson, and Ronald L. Rivest + http://theory.lcs.mit.edu/~cis/sdsi.html + +[SPKI] SPKI--A + Simple Public Key Infrastructure + +[BERN] Dan Bernstein's "net-strings"; Internet Draft + draft-bernstein-netstrings-02.txt + +Author's Address + + Ronald L. Rivest + Room 324, 545 Technology Square + MIT Laboratory for Computer Science + Cambridge, MA 02139 + + rivest@theory.lcs.mit.edu + + diff --git a/factory.ml b/factory.ml new file mode 100644 index 0000000..3eef79c --- /dev/null +++ b/factory.ml @@ -0,0 +1,38 @@ +open Printf +open Sexp +open Datastructures + +type factory_t = Sexp.t -> Sexp.t option + +let classes = ref StringMap.empty + +let register_class name factory = + if StringMap.mem name !classes + then (printf "ERROR: Duplicate node class name %s\n%!" name; + exit 1) + else classes := StringMap.add name factory !classes + +let lookup_class name = + try Some (StringMap.find name !classes) + with Not_found -> None + +let factory_handler n sexp = + match Message.message_of_sexp sexp with + | Message.Create (Str classname, arg, Str reply_sink, Str reply_name) -> + (match lookup_class classname with + | Some factory -> + let reply = + match factory arg with + | None -> + Message.create_ok + | Some explanation -> + Message.create_failed explanation + in + Node.post_ignore reply_sink (Str reply_name) reply (Str "") + | None -> + printf "WARNING: Node class not found <<%s>>\n%!" classname) + | m -> + Util.message_not_understood "factory" m + +let init () = + Node.bind_ignore ("factory", Node.make "factory" factory_handler) diff --git a/fqueue.ml b/fqueue.ml new file mode 100644 index 0000000..fc5153b --- /dev/null +++ b/fqueue.ml @@ -0,0 +1,86 @@ +(* Functional queue. *) + +type 'a t = Q of int * 'a list * 'a list + +let empty = Q (0, [], []) + +let singleton v = Q (1, [], [v]) + +let length (Q (n, _, _)) = n + +let is_empty q = (length q = 0) + +let push_back (Q (n, front, back)) v = Q (n + 1, front, v :: back) +let push_front (Q (n, front, back)) v = Q (n + 1, v :: front, back) + +let push_back_all (Q (n, front, back)) vs = Q (n + List.length vs, front, List.rev_append vs back) +let push_front_all (Q (n, front, back)) vs = Q (n + List.length vs, List.append vs front, back) + +let push_back_all_rev (Q (n, front, back)) vs = + Q (n + List.length vs, front, List.append vs back) +let push_front_all_rev (Q (n, front, back)) vs = + Q (n + List.length vs, List.rev_append vs front, back) + +let pop_ remote local = + match local with + | [] -> + (match List.rev remote with + | [] -> None + | v :: rest -> Some (v, [], rest)) + | v :: rest -> Some (v, remote, rest) + +let pop_back (Q (n, front, back)) = + match pop_ front back with + | Some (v, front', back') -> Some (v, Q (n - 1, front', back')) + | None -> None + +let pop_front (Q (n, front, back)) = + match pop_ back front with + | Some (v, back', front') -> Some (v, Q (n - 1, front', back')) + | None -> None + +let peek_back (Q (n, front, back)) = + match pop_ front back with + | Some (v, front', back') -> Some (v, Q (n - 1, front', v :: back')) + | None -> None + +let peek_front (Q (n, front, back)) = + match pop_ back front with + | Some (v, back', front') -> Some (v, Q (n - 1, v :: front', back')) + | None -> None + +let unsome x = + match x with + | Some v -> v + | None -> raise Not_found + +let really_pop_back q = unsome (pop_back q) +let really_pop_front q = unsome (pop_front q) + +let really_peek_back q = unsome (peek_back q) +let really_peek_front q = unsome (peek_front q) + +let slow_peek_back q = let (v, _) = unsome (peek_back q) in v +let slow_peek_front q = let (v, _) = unsome (peek_front q) in v + +let of_list vs = Q (List.length vs, vs, []) +let of_list_rev vs = Q (List.length vs, [], vs) + +let to_list (Q (_, front, back)) = List.append front (List.rev back) +let to_list_rev (Q (_, front, back)) = List.append back (List.rev front) + +(* Warning: doesn't operate in order *) +let map f (Q (n, front, back)) = Q (n, List.map f front, List.rev_map f back) + +let append (Q (n1, front1, back1)) (Q (n2, front2, back2)) = + Q (n1 + n2, front1 @ List.rev_append back1 front2, back2) + +let iter f (Q (_, front, back)) = + List.iter f front; + List.iter f (List.rev back) + +let fold_left f seed (Q (_, front, back)) = + List.fold_right (fun v s -> f s v) back (List.fold_left f seed front) + +let fold_right f seed (Q (_, front, back)) = + List.fold_right f front (List.fold_left (fun s v -> f v s) seed back) diff --git a/messages.json b/messages.json new file mode 100644 index 0000000..163aab7 --- /dev/null +++ b/messages.json @@ -0,0 +1,42 @@ +[ + { + "selector": "create", + "args": ["classname", "arg", "reply-sink", "reply-name"] + }, + { + "selector": "create-ok", + "args": [] + }, + { + "selector": "create-failed", + "args": ["reason"] + }, + { + "selector": "subscribed", + "args": ["source", "filter", "sink", "name"] + }, + { + "selector": "unsubscribed", + "args": ["source", "filter", "sink", "name"] + }, + { + "selector": "post", + "args": ["name", "body", "token"] + }, + { + "selector": "subscribe", + "args": ["filter", "sink", "name", "reply_sink", "reply_name"] + }, + { + "selector": "subscribe-ok", + "args": ["token"] + }, + { + "selector": "unsubscribe", + "args": ["token"] + }, + { + "selector": "error", + "args": ["message", "details"] + } +] diff --git a/node.ml b/node.ml new file mode 100644 index 0000000..05da942 --- /dev/null +++ b/node.ml @@ -0,0 +1,76 @@ +open Printf +open Datastructures + +type handle_message_t = t -> Sexp.t -> unit +and t = { + mutable names: StringSet.t; + class_name: string; + handle_message: handle_message_t + } + +let directory = ref StringMap.empty + +let local_container_name () = "server" + +let make class_name handler = { + names = StringSet.empty; + class_name = class_name; + handle_message = handler +} + +let lookup name = + try Some (StringMap.find name !directory) + with Not_found -> None + +let bind (filter, node) = + if filter = "" + then (printf "WARNING: Binding to empty name forbidden\n%!"; false) + else + if StringMap.mem filter !directory + then false + else (directory := StringMap.add filter node !directory; + node.names <- StringSet.add filter node.names; + printf "INFO: Binding node <<%s>> of class %s\n%!" filter node.class_name; + true) + +(* For use in factory constructor functions, hence the odd return type and values *) +let make_named class_name node_name handler = + let node = make class_name handler in + if bind (node_name, node) then None else Some (Sexp.Str "bind-failed") + +let unbind name = + match lookup name with + | Some n -> + printf "INFO: Unbinding node <<%s>> of class %s\n%!" name n.class_name; + n.names <- StringSet.remove name n.names; + directory := StringMap.remove name !directory; + true + | None -> + false + +let unbind_all n = + StringSet.iter (fun name -> ignore (unbind name)) n.names; + n.names <- StringSet.empty + +let send name body = + match lookup name with + | Some n -> n.handle_message n body; true + | None -> false + +let post name label body token = + send name (Message.post (label, body, token)) + +let bind_ignore (filter, node) = + if bind (filter, node) + then () + else printf "WARNING: Duplicate binding <<%s>>\n%!" filter + +let send_ignore name body = + if send name body + then () + else (printf "WARNING: send to missing node %s: " name; + Sexp.output_sexp Pervasives.stdout body; + print_newline ()) + +let post_ignore name label body token = + send_ignore name (Message.post (label, body, token)) diff --git a/ocamlmsg.ml b/ocamlmsg.ml new file mode 100644 index 0000000..0372446 --- /dev/null +++ b/ocamlmsg.ml @@ -0,0 +1,23 @@ +open Unix +open Printf +open Thread + +let rec accept_loop sock = + let (s, peername) = accept sock in + ignore (Relay.start_relay (s, peername)); + accept_loop sock + +let start_net port_number = + let sock = socket PF_INET SOCK_STREAM 0 in + setsockopt sock SO_REUSEADDR true; + bind sock (ADDR_INET (inet_addr_of_string "0.0.0.0", port_number)); + listen sock 5; + printf "INFO: Accepting connections on port %d.\n%!" port_number; + accept_loop sock + +let _ = + printf "ocamlmsg ALPHA, Copyright (C) 2012 Tony Garnock-Jones. All rights reserved.\n%!"; + Uuid.init (); + Factory.init (); + Queuenode.init (); + start_net 5671 diff --git a/queuenode.ml b/queuenode.ml new file mode 100644 index 0000000..e49e907 --- /dev/null +++ b/queuenode.ml @@ -0,0 +1,83 @@ +open Sexp + +type t = { + name: string; + subscriptions: Subscription.set_t; + ch: Message.t Event.channel; + mutable backlog: Sexp.t Fqueue.t; + mutable waiters: Subscription.t Fqueue.t; + } + +let classname = "queue" + +let rec do_burst info n = + (* + Printf.printf "INFO: do_burst %d backlog %d waiters %d ticks left\n%!" + (Fqueue.length info.backlog) + (Fqueue.length info.waiters) + n; + *) + if Fqueue.is_empty info.backlog then false + else + if Fqueue.is_empty info.waiters then false + else + if n = 0 then true (* maybe more work available, but should poll for outside events *) + else + let (body, new_backlog) = Fqueue.really_pop_front info.backlog in + let (sub, new_waiters) = Fqueue.really_pop_front info.waiters in + info.waiters <- new_waiters; + if Subscription.send_to_subscription info.name info.subscriptions sub body + then + (info.waiters <- Fqueue.push_back info.waiters sub; + info.backlog <- new_backlog; + do_burst info (n - 1)) + else + do_burst info n + +let rec process_and_wait info = + if not (do_burst info 1000) + then Event.sync (Event.receive info.ch) + else + match Event.poll (Event.receive info.ch) with + | Some m -> m + | None -> process_and_wait info + +let shoveller info = + let rec loop () = + match process_and_wait info with + | Message.Post (name, body, token) -> + info.backlog <- Fqueue.push_back info.backlog body; + loop () + | Message.Subscribe (filter, Str sink, name, Str reply_sink, reply_name) -> + let sub = + Subscription.create info.name info.subscriptions + filter sink name reply_sink reply_name + in + info.waiters <- Fqueue.push_back info.waiters sub; + loop () + | Message.Unsubscribe (Str token) -> + Subscription.delete info.name info.subscriptions token; + loop () + | m -> + Util.message_not_understood "queue" m; + loop () + in loop () + +let queue_factory arg = + match arg with + | (Arr [Str name]) -> + let info = { + name = name; + subscriptions = Subscription.new_set (); + ch = Event.new_channel (); + backlog = Fqueue.empty; + waiters = Fqueue.empty + } in + ignore (Util.create_thread name None shoveller info); + let queue_handler n sexp = Event.sync (Event.send info.ch (Message.message_of_sexp sexp)) in + Node.make_named classname name queue_handler + | _ -> + Some (Str "bad-arg") + +let init () = + Factory.register_class classname queue_factory diff --git a/relay.ml b/relay.ml new file mode 100644 index 0000000..8601e8b --- /dev/null +++ b/relay.ml @@ -0,0 +1,88 @@ +open Unix +open Printf +open Thread +open Sexp + +let connection_count = ref 0 + +let endpoint_name n = + match n with + | ADDR_INET (host, port) -> sprintf "%s:%d" (string_of_inet_addr host) port + | _ -> "??unknown??" + +let send_error ch message details = + let m = Message.error (Str message, details) in + print_string "WARNING: Sending error: "; + output_sexp Pervasives.stdout m; + print_newline (); + output_sexp_and_flush ch m + +let send_sexp_syntax_error ch explanation = + send_error ch explanation (Str "http://people.csail.mit.edu/rivest/Sexp.txt") + +let dispatch_message n ch m = + match m with + | Message.Post (Str name, body, token) -> + Node.send_ignore name body + | Message.Subscribe (Str filter, sink, name, Str reply_sink, Str reply_name) -> + if Node.bind(filter, n) + then Node.post_ignore + reply_sink + (Str reply_name) + (Message.subscribe_ok (Str filter)) + (Str "") + else printf "WARNING: Bind failed <<%s>>\n%!" filter + | Message.Unsubscribe token -> + () (* %%% TODO *) + | _ -> + send_error ch "Message not understood" (Message.sexp_of_message m) + +let output_thread ch cout = + let rec loop v = + match v with + | Some (Some sexp) -> + output_sexp cout sexp; + loop (Event.poll (Event.receive ch)) + | Some None -> + () + | None -> + (* flush cout; *) + loop (Some (Event.sync (Event.receive ch))) + in loop None + +let relay_handler mtx cout n m = + Mutex.lock mtx; + output_sexp cout m; + Mutex.unlock mtx + +let relay_main peername cin cout = + printf "INFO: Accepted connection from %s\n%!" (endpoint_name peername); + output_sexp_and_flush cout (Arr [Str "hop"; Str ""]); + output_sexp_and_flush cout + (Message.subscribe (Str (Node.local_container_name()), + Str "", Str "", + Str "", Str "")); + let mtx = Mutex.create () in + let n = Node.make "relay" (relay_handler mtx cout) in + (try + while true do + dispatch_message n cout (Message.message_of_sexp (Sexp.input_sexp cin)) + done + with + | End_of_file -> + printf "INFO: Disconnecting %s normally.\n%!" (endpoint_name peername) + | Sexp.Syntax_error explanation -> + send_sexp_syntax_error cout explanation); + Node.unbind_all n + +let start_relay' (s, peername) = + let cin = in_channel_of_descr s in + let cout = out_channel_of_descr s in + connection_count := 1 + !connection_count; + relay_main peername cin cout; + connection_count := 0 + !connection_count; + flush cout; + close s + +let start_relay (s, peername) = + Util.create_thread (endpoint_name peername ^ " input") None start_relay' (s, peername) diff --git a/sexp.ml b/sexp.ml new file mode 100644 index 0000000..3191f2a --- /dev/null +++ b/sexp.ml @@ -0,0 +1,87 @@ +(* SPKI SEXP *) + +exception Syntax_error of string + +type display_hint_t = {hint : t; body : t} +and t = + | Str of string + | Hint of display_hint_t + | Arr of t list + +let _output_str ch s = + output_string ch (string_of_int (String.length s)); + output_char ch ':'; + output_string ch s + +let rec output_sexp ch x = + match x with + | Str s -> + _output_str ch s + | Hint {hint = h; body = b} -> + output_char ch '['; + output_sexp ch h; + output_char ch ']'; + output_sexp ch b + | Arr xs -> + output_char ch '('; + output_sexps ch xs; + output_char ch ')' + +and output_sexps ch xs = + match xs with + | [] -> + () + | x :: xs' -> + output_sexp ch x; + output_sexps ch xs' + +let output_sexp_and_flush ch x = + output_sexp ch x; + flush ch + +let char_numeric c = '0' <= c && c <= '9' +let char_whitespace c = c <= ' ' + +let digit_val c = (int_of_char c) - (int_of_char '0') + +let input_bytes count ch = + let buf = String.create count in (* mutable strings?!?! *) + really_input ch buf 0 count; + Str buf + +let syntax_error explanation = raise (Syntax_error explanation) + +let rec input_simple_string len ch = + match input_char ch with + | ':' -> input_bytes len ch + | b when char_numeric b -> input_simple_string (len * 10 + digit_val b) ch + | _ -> syntax_error "Bad simple-string length character" + +let rec input_sexp_list ch = + let rec collect acc = + match input_sexp_inner ch with + | None -> Arr (List.rev acc) + | Some v -> collect (v :: acc) + in collect [] + +and input_sexp_inner ch = + match input_char ch with + | '(' -> Some (input_sexp_list ch) + | ')' -> None + | '[' -> + let hint = input_simple_string 0 ch in + (match input_char ch with + | ']' -> Some (Hint {hint = hint; body = input_simple_string 0 ch}) + | _ -> syntax_error "Missing close-bracket in display hint") + | b when char_numeric b -> + Some (input_simple_string (digit_val b) ch) + | b when char_whitespace b -> + (* Convenience for testing *) + input_sexp_inner ch + | _ -> + syntax_error "Bad SEXP input character" + +let input_sexp ch = + match input_sexp_inner ch with + | None -> syntax_error "Unexpected end of list" + | Some v -> v diff --git a/subscription.ml b/subscription.ml new file mode 100644 index 0000000..4b44e3b --- /dev/null +++ b/subscription.ml @@ -0,0 +1,39 @@ +open Datastructures + +type t = { + mutable live: bool; + uuid: Uuid.t; + filter: Sexp.t; + sink: string; + name: Sexp.t + } + +type set_t = t StringMap.t ref + +let new_set () = ref StringMap.empty + +let create source subs filter sink name reply_sink reply_name = + let uuid = Uuid.create () in + let sub = { + live = true; + uuid = uuid; + filter = filter; + sink = sink; + name = name + } in + subs := StringMap.add uuid sub !subs; + Node.post_ignore reply_sink reply_name (Message.subscribe_ok (Sexp.Str uuid)) (Sexp.Str ""); + sub + +let delete source subs uuid = + (try (StringMap.find uuid !subs).live <- false + with Not_found -> ()); + subs := StringMap.remove uuid !subs + +let send_to_subscription source subs sub body = + if not sub.live + then false + else + if Node.post sub.sink sub.name body (Sexp.Str sub.uuid) + then true + else (delete source subs sub.uuid; false) diff --git a/test_fqueue.ml b/test_fqueue.ml new file mode 100644 index 0000000..3533e02 --- /dev/null +++ b/test_fqueue.ml @@ -0,0 +1,33 @@ +#use "fqueue.ml" + +let q12 = append (singleton 1) (singleton 2) +let q1234 = append (of_list [1; 2]) (of_list [3; 4]) + +let _ = + assert (length empty = 0); + assert (is_empty empty); + assert (pop_front (push_back empty 1) = Some (1, empty)); + assert (pop_back (push_front empty 1) = Some (1, empty)); + assert (to_list (of_list [1; 2; 3]) = [1; 2; 3]); + assert (length (of_list [1; 2; 3]) = 3); + assert (pop_back (of_list [1; 2; 3]) = Some (3, Q(2, [], [2; 1]))); + assert (pop_front (of_list [1; 2; 3]) = Some (1, of_list [2; 3])); + assert (to_list (push_back_all empty [1; 2; 3]) = [1; 2; 3]); + assert (to_list (push_front_all empty [1; 2; 3]) = [1; 2; 3]); + assert (to_list (push_back_all_rev empty [1; 2; 3]) = [3; 2; 1]); + assert (to_list (push_front_all_rev empty [1; 2; 3]) = [3; 2; 1]); + assert ((try slow_peek_back empty with _ -> 1) = 1); + assert ((try slow_peek_front empty with _ -> 1) = 1); + assert ((try slow_peek_back (singleton 1) with _ -> 2) = 1); + assert ((try slow_peek_front (singleton 1) with _ -> 2) = 1); + assert ((try slow_peek_back (of_list [1; 2; 3]) with _ -> 99) = 3); + assert ((try slow_peek_front (of_list [1; 2; 3]) with _ -> 99) = 1); + assert (to_list_rev (of_list [1; 2; 3]) = [3; 2; 1]); + assert (to_list (of_list_rev [1; 2; 3]) = [3; 2; 1]); + assert (to_list_rev (of_list_rev [1; 2; 3]) = [1; 2; 3]); + assert (map ((+) 1) (of_list [1; 2; 3]) = of_list [2; 3; 4]); + assert (to_list q12 = [1; 2]); + assert (fold_left (fun s v -> v :: s) [] q1234 = [4; 3; 2; 1]); + assert (fold_right (fun v s -> v :: s) [] q1234 = [1; 2; 3; 4]); + assert (fold_left (fun s v -> v :: s) [] q12 = [2; 1]); + assert (fold_right (fun v s -> v :: s) [] q12 = [1; 2]); diff --git a/util.ml b/util.ml new file mode 100644 index 0000000..776bc39 --- /dev/null +++ b/util.ml @@ -0,0 +1,18 @@ +open Printf + +let message_not_understood context m = + printf "WARNING: Message not understood in %s: " context; + Sexp.output_sexp stdout (Message.sexp_of_message m); + print_newline () + +let create_thread name cleanup main initarg = + let guarded_main initarg = + try + main initarg + with e -> + printf "WARNING: Thread <<%s>> died with %s\n%!" name (Printexc.to_string e); + (match cleanup with + | Some cleaner -> cleaner () + | None -> ()) + in + Thread.create guarded_main initarg diff --git a/uuid.ml b/uuid.ml new file mode 100644 index 0000000..74b2115 --- /dev/null +++ b/uuid.ml @@ -0,0 +1,13 @@ +type t = string + +let create () = + (* 128 bits *) + let w1 = Random.bits () in + let w2 = Random.bits () in + let w3 = Random.bits () in + let w4 = Random.bits () in + let bb = Random.int 256 in + Printf.sprintf "%08x%08x%08x%08x%02x" w1 w2 w3 w4 bb + +let init () = + Random.self_init ()