From a11fdc05005d38f4902972117d337555874cfddf Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sun, 8 Jan 2012 12:41:04 -0500 Subject: [PATCH] Initial commit --- .gitignore | 2 + Makefile | 16 ++ _tags | 2 + codegen.py | 88 ++++++ datastructures.ml | 2 + doc/Sexp.txt | 699 ++++++++++++++++++++++++++++++++++++++++++++++ factory.ml | 38 +++ fqueue.ml | 86 ++++++ messages.json | 42 +++ node.ml | 76 +++++ ocamlmsg.ml | 23 ++ queuenode.ml | 83 ++++++ relay.ml | 88 ++++++ sexp.ml | 87 ++++++ subscription.ml | 39 +++ test_fqueue.ml | 33 +++ util.ml | 18 ++ uuid.ml | 13 + 18 files changed, 1435 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 _tags create mode 100644 codegen.py create mode 100644 datastructures.ml create mode 100644 doc/Sexp.txt create mode 100644 factory.ml create mode 100644 fqueue.ml create mode 100644 messages.json create mode 100644 node.ml create mode 100644 ocamlmsg.ml create mode 100644 queuenode.ml create mode 100644 relay.ml create mode 100644 sexp.ml create mode 100644 subscription.ml create mode 100644 test_fqueue.ml create mode 100644 util.ml create mode 100644 uuid.ml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e63542 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +scratch/ +_build/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8745497 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +APP=ocamlmsg + +all: message.ml $(APP).native + +message.ml: messages.json codegen.py + python codegen.py > $@ + +clean: + ocamlbuild -clean + rm -f message.ml + +$(APP).native: $(wildcard *.ml) + ocamlbuild $@ + +run: all + ./$(APP).native diff --git a/_tags b/_tags new file mode 100644 index 0000000..809c5b8 --- /dev/null +++ b/_tags @@ -0,0 +1,2 @@ +true: use_unix +true: thread diff --git a/codegen.py b/codegen.py new file mode 100644 index 0000000..f3499d0 --- /dev/null +++ b/codegen.py @@ -0,0 +1,88 @@ +from __future__ import with_statement + +# Copyright (C) 2012 Tony Garnock-Jones. All rights reserved. +copyright_stmt = '(* Copyright (C) 2012 Tony Garnock-Jones. All rights reserved. *)' + +import sys +import json + +def mlify(s): + s = s.replace('-', '_') + s = s.replace(' ', '_') + return s + +class MessageType: + def __init__(self, j): + self.wire_selector = j['selector'] + self.selector = mlify(self.wire_selector) + self.constructor = self.selector.capitalize() + self.wire_argnames = j['args'] + self.argnames = map(mlify, self.wire_argnames) + + def format_args(self, template, separator = ', '): + return separator.join([template % (x,) for x in self.argnames]) + +with file("messages.json") as f: + spec = map(MessageType, json.load(f)) + +def print_list(o, xs, sep, c): + sys.stdout.write(o) + needsep = False + for x in xs: + if needsep: + sys.stdout.write(sep) + else: + needsep = True + sys.stdout.write(x) + sys.stdout.write(c) + +def print_codec(): + print copyright_stmt + print + print 'open Sexp' + print + print 'type t =' + for t in spec: + if t.argnames: + print ' | %s of ' % (t.constructor), + print_list('(', ['Sexp.t' for n in t.argnames], ' * ', ')\n') + else: + print ' | %s' % t.constructor + print ' | UNKNOWN of Sexp.t' + print + print 'let sexp_of_message m = match m with' + for t in spec: + sys.stdout.write(' | %s' % t.constructor) + if t.argnames: + print_list(' (', [n for n in t.argnames], ', ', ')') + print ' ->' + sys.stdout.write(' Arr [Str "%s"' % t.wire_selector) + if t.argnames: + print_list('; ', t.argnames, '; ', '') + print ']' + print ' | UNKNOWN s -> s' + print + print 'let message_of_sexp s = match s with' + for t in spec: + sys.stdout.write(' | Arr [Str "%s"' % t.wire_selector) + if t.argnames: + print_list('; ', t.argnames, '; ', '') + print '] ->' + sys.stdout.write(' %s' % t.constructor) + if t.argnames: + print_list(' (', [n for n in t.argnames], ', ', ')') + print + print ' | _ -> UNKNOWN s' + print + for t in spec: + sys.stdout.write('let %s' % t.selector) + if t.argnames: + print_list(' (', t.argnames, ', ', ')') + sys.stdout.write(' = sexp_of_message (%s' % t.constructor) + if t.argnames: + print_list(' (', t.argnames, ', ', ')') + sys.stdout.write(')\n') + print + +if __name__ == '__main__': + print_codec() diff --git a/datastructures.ml b/datastructures.ml new file mode 100644 index 0000000..be92e2a --- /dev/null +++ b/datastructures.ml @@ -0,0 +1,2 @@ +module StringSet = Set.Make(String) +module StringMap = Map.Make(String) diff --git a/doc/Sexp.txt b/doc/Sexp.txt new file mode 100644 index 0000000..caf4542 --- /dev/null +++ b/doc/Sexp.txt @@ -0,0 +1,699 @@ +Network Working Group R. Rivest +Internet Draft May 4, 1997 +Expires November 4, 1997 + + + S-Expressions + draft-rivest-sexp-00.txt + + +Status of this Memo + + Distribution of this memo is unlimited. + + This document is an Internet-Draft. Internet Drafts are working + documents of the Internet Engineering Task Force (IETF), its Areas, + and its Working Groups. Note that other groups may also distribute + working documents as Internet Drafts. + + Internet Drafts are draft documents valid for a maximum of six + months, and may be updated, replaced, or obsoleted by other documents + at any time. It is not appropriate to use Internet Drafts as + reference material, or to cite them other than as a ``working draft'' + or ``work in progress.'' + + To learn the current status of any Internet-Draft, please check the + ``1id-abstracts.txt'' listing contained in the internet-drafts Shadow + Directories on: ftp.is.co.za (Africa), nic.nordu.net (Europe), + ds.internic.net (US East Coast), ftp.isi.edu (US West Coast), + or munnari.oz.au (Pacific Rim) + + +Abstract + +This memo describes a data structure called "S-expressions" that are +suitable for representing arbitrary complex data structures. We make +precise the encodings of S-expressions: we give a "canonical form" for +S-expressions, described two "transport" representations, and also +describe an "advanced" format for display to people. + + + +1. Introduction + +S-expressions are data structures for representing complex data. They +are either byte-strings ("octet-strings") or lists of simpler +S-expressions. Here is a sample S-expression: + + (snicker "abc" (#03# |YWJj|)) + +It is a list of length three: + + -- the octet-string "snicker" + + -- the octet-string "abc" + + -- a sub-list containing two elements: + - the hexadecimal constant #03# + - the base-64 constant |YWJj| (which is the same as "abc") + +This note gives a specific proposal for constructing and utilizing +S-expressions. The proposal is independent of any particular application. + +Here are the design goals for S-expressions: + + -- generality: S-expressions should be good at representing arbitrary + data. + + -- readability: it should be easy for someone to examine and + understand the structure of an S-expression. + + -- economy: S-expressions should represent data compactly. + + -- tranportability: S-expressions should be easy to transport + over communication media (such as email) that are known to be + less than perfect. + + -- flexibility: S-expressions should make it relatively simple to + modify and extend data structures. + + -- canonicalization: it should be easy to produce a unique + "canonical" form of an S-expression, for digital signature purposes. + + -- efficiency: S-expressions should admit in-memory representations + that allow efficient processing. + + +Section 2 gives an introduction to S-expressions. +Section 3 discusses the character sets used. +Section 4 presents the various representations of octet-strings. +Section 5 describes how to represent lists. +Section 6 discusses how S-expressions are represented for various uses. +Section 7 gives a BNF syntax for S-expressions. +Section 8 talks about how S-expressions might be represented in memory. +Section 9 briefly describes implementations for handling S-expressions. +Section 10 discusses how applications might utilize S-expressions. +Section 11 gives historical notes on S-expressions. +Section 12 gives references. + +2. S-expressions -- informal introduction + +Informally, an S-expression is either: + -- an octet-string, or + -- a finite list of simpler S-expressions. + +An octet-string is a finite sequence of eight-bit octets. There may be +many different but equivalent ways of representing an octet-string + + abc -- as a token + + "abc" -- as a quoted string + + #616263# -- as a hexadecimal string + + 3:abc -- as a length-prefixed "verbatim" encoding + + {MzphYmM=} -- as a base-64 encoding of the verbatim encoding + (that is, an encoding of "3:abc") + + |YWJj| -- as a base-64 encoding of the octet-string "abc" + +These encodings are all equivalent; they all denote the same octet string. + +We will give details of these encodings later on, and also describe how to +give a "display type" to a byte string. + +A list is a finite sequence of zero or more simpler S-expressions. A list +may be represented by using parentheses to surround the sequence of encodings +of its elements, as in: + + (abc (de #6667#) "ghi jkl") + +As we see, there is variability possible in the encoding of an +S-expression. In some cases, it is desirable to standardize or +restrict the encodings; in other cases it is desirable to have no +restrictions. The following are the target cases we aim to handle: + + -- a "transport" encoding for transporting the S-expression between + computers. + + -- a "canonical" encoding, used when signing the S-expression. + + -- an "advanced" encoding used for input/output to people. + + -- an "in-memory" encoding used for processing the S-expression in + the computer. + +These need not be different; in this proposal the canonical encoding +is the same as the transport encoding, for example. In this note we +propose (related) encoding techniques for each of these uses. + +3. Character set + +We will be describing encodings of S-expressions. Except when giving +"verbatim" encodings, the character set used is limited to the following +characters in US-ASCII: + Alphabetic: A B ... Z a b ... z + numeric: 0 1 ... 9 + whitespace: space, horizontal tab, vertical tab, form-feed + carriage-return, line-feed + The following graphics characters, which we call "pseudo-alphabetic": + - hyphen or minus + . period + / slash + _ underscore + : colon + * asterisk + + plus + = equal + The following graphics characters, which are "reserved punctuation": + ( left parenthesis + ) right parenthesis + [ left bracket + ] right bracket + { left brace + } right brace + | vertical bar + # number sign + " double quote + & ampersand + \ backslash + The following characters are unused and unavailable, except in + "verbatim" encodings: + ! exclamation point + % percent + ^ circumflex + ~ tilde + ; semicolon + ' apostrophe + , comma + < less than + > greater than + ? question mark + + +4. Octet string representations + +This section describes in detail the ways in which an octet-string may +be represented. + +We recall that an octet-string is any finite sequence of octets, and +that the octet-string may have length zero. + + +4.1 Verbatim representation + +A verbatim encoding of an octet string consists of four parts: + + -- the length (number of octets) of the octet-string, + given in decimal most significant digit first, with + no leading zeros. + + -- a colon ":" + + -- the octet string itself, verbatim. + +There are no blanks or whitespace separating the parts. No "escape +sequences" are interpreted in the octet string. This encoding is also +called a "binary" or "raw" encoding. + +Here are some sample verbatim encodings: + + 3:abc + 7:subject + 4::::: + 12:hello world! + 10:abcdefghij + 0: + +4.2 Quoted-string representation + +The quoted-string representation of an octet-string consists of: + + -- an optional decimal length field + + -- an initial double-quote (") + + -- the octet string with "C" escape conventions (\n,etc) + + -- a final double-quote (") + +The specified length is the length of the resulting string after any +escape sequences have been handled. The string does not have any +"terminating NULL" that C includes, and the length does not count such +a character. + +The length is optional. + +The escape conventions within the quoted string are as follows (these follow +the "C" programming language conventions, with an extension for +ignoring line terminators of just LF or CRLF): + \b -- backspace + \t -- horizontal tab + \v -- vertical tab + \n -- new-line + \f -- form-feed + \r -- carriage-return + \" -- double-quote + \' -- single-quote + \\ -- back-slash + \ooo -- character with octal value ooo (all three digits + must be present) + \xhh -- character with hexadecimal value hh (both digits + must be present) + \ -- causes carriage-return to be ignored. + \ -- causes linefeed to be ignored + \ -- causes CRLF to be ignored. + \ -- causes LFCR to be ignored. + +Here are some examples of quoted-string encodings: + + "subject" + "hi there" + 7"subject" + 3"\n\n\n" + "This has\n two lines." + "This has\ + one." + "" + +4.3 Token representation + +An octet string that meets the following conditions may be given +directly as a "token". + + -- it does not begin with a digit + + -- it contains only characters that are + -- alphabetic (upper or lower case), + -- numeric, or + -- one of the eight "pseudo-alphabetic" punctuation marks: + - . / _ : * + = + (Note: upper and lower case are not equivalent.) + (Note: A token may begin with punctuation, including ":"). + +Here are some examples of token representations: + + subject + not-before + class-of-1997 + //microsoft.com/names/smith + * + + +4.4 Hexadecimal representation + +An octet-string may be represented with a hexadecimal encoding consisting of: + + -- an (optional) decimal length of the octet string + + -- a sharp-sign "#" + + -- a hexadecimal encoding of the octet string, with each octet + represented with two hexadecimal digits, most significant + digit first. + + -- a sharp-sign "#" + +There may be whitespace inserted in the midst of the hexadecimal +encoding arbitrarily; it is ignored. It is an error to have +characters other than whitespace and hexadecimal digits. + +Here are some examples of hexadecimal encodings: + + #616263# -- represents "abc" + 3#616263# -- also represents "abc" + # 616 + 263 # -- also represents "abc" + + +4.5 Base-64 representation + +An octet-string may be represented in a base-64 coding consisting of: + + -- an (optional) decimal length of the octet string + + -- a vertical bar "|" + + -- the rfc 1521 base-64 encoding of the octet string. + + -- a final vertical bar "|" + +The base-64 encoding uses only the characters + A-Z a-z 0-9 + / = +It produces four characters of output for each three octets of input. +If the input has one or two left-over octets of input, it produces an +output block of length four ending in two or one equals signs, respectively. +Output routines compliant with this standard MUST output the equals signs +as specified. Input routines MAY accept inputs where the equals signs are +dropped. + +There may be whitespace inserted in the midst of the base-64 encoding +arbitrarily; it is ignored. It is an error to have characters other +than whitespace and base-64 characters. + +Here are some examples of base-64 encodings: + + |YWJj| -- represents "abc" + | Y W + J j | -- also represents "abc" + 3|YWJj| -- also represents "abc" + |YWJjZA==| -- represents "abcd" + |YWJjZA| -- also represents "abcd" + + +4.6 Display hint + +Any octet string may be preceded by a single "display hint". + +The purposes of the display hint is to provide information on how +to display the octet string to a user. It has no other function. +Many of the MIME types work here. + +A display-hint is an octet string surrounded by square brackets. +There may be whitespace separating the octet string from the +surrounding brackets. Any of the legal formats may be used for the +octet string. + +Here are some examples of display-hints: + + [image/gif] + [URI] + [charset=unicode-1-1] + [text/richtext] + [application/postscript] + [audio/basic] + ["http://abc.com/display-types/funky.html"] + +In applications an octet-string that is untyped may be considered to have +a pre-specified "default" mime type. The mime type + "text/plain; charset=iso-8859-1" +is the standard default. + + +4.7 Equality of octet-strings + +Two octet strings are considered to be "equal" if and only if they +have the same display hint and the same data octet strings. + +Note that octet-strings are "case-sensitive"; the octet-string "abc" +is not equal to the octet-string "ABC". + +An untyped octet-string can be compared to another octet-string (typed +or not) by considering it as a typed octet-string with the default +mime-type. + + +5. Lists + +Just as with octet-strings, there are several ways to represent an +S-expression. Whitespace may be used to separate list elements, but +they are only required to separate two octet strings when otherwise +the two octet strings might be interpreted as one, as when one token +follows another. Also, whitespace may follow the initial left +parenthesis, or precede the final right parenthesis. + +Here are some examples of encodings of lists: + + (a b c) + + ( a ( b c ) ( ( d e ) ( e f ) ) ) + + (11:certificate(6:issuer3:bob)(7:subject5:alice)) + + ({3Rt=} "1997" murphy 3:{XC++}) + + +6. Representation types + +There are three "types" of representations: + + -- canonical + + -- basic transport + + -- advanced transport + +The first two MUST be supported by any implementation; the last is +optional. + + +6.1 Canonical representation + +This canonical representation is used for digital signature purposes, +transmission, etc. It is uniquely defined for each S-expression. It +is not particularly readable, but that is not the point. It is +intended to be very easy to parse, to be reasonably economical, and to +be unique for any S-expression. + +The "canonical" form of an S-expression represents each octet-string +in verbatim mode, and represents each list with no blanks separating +elements from each other or from the surrounding parentheses. + +Here are some examples of canonical representations of S-expressions: + + (6:issuer3:bob) + + (4:icon[12:image/bitmap]9:xxxxxxxxx) + + (7:subject(3:ref5:alice6:mother)) + + +6.2 Basic transport representation + +There are two forms of the "basic transport" representation: + + -- the canonical representation + + -- an rfc-2045 base-64 representation of the canonical representation, + surrounded by braces. + +The transport mechanism is intended to provide a universal means of +representing S-expressions for transport from one machine to another. + +Here are some examples of an S-expression represented in basic +transport mode: + + (1:a1:b1:c) + + {KDE6YTE6YjE6YykA} + + (this is the same S-expression encoded in base-64) + +There is a difference between the brace notation for base-64 used here +and the || notation for base-64'd octet-strings described above. Here +the base-64 contents are converted to octets, and then re-scanned as +if they were given originally as octets. With the || notation, the +contents are just turned into an octet-string. + + +6.3 Advanced transport representation + +The "advanced transport" representation is intended to provide more +flexible and readable notations for documentation, design, debugging, +and (in some cases) user interface. + +The advanced transport representation allows all of the representation +forms described above, include quoted strings, base-64 and hexadecimal +representation of strings, tokens, representations of strings with +omitted lengths, and so on. + + +7. BNF for syntax + +We give separate BNF's for canonical and advanced forms of S-expressions. +We use the following notation: + * means 0 or more occurrences of + + means 1 or more occurrences of + ? means 0 or 1 occurrences of + parentheses are used for grouping, as in ( | )* + +For canonical and basic transport: + + :: | + :: ? ; + :: ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: "(" * ")" ; + :: "0" | ... | "9" ; + +For advanced transport: + + :: | + :: ? ; + :: | | | | + ; + :: "[" "]" ; + :: ":" ; + :: + ; + -- decimal numbers should have no unnecessary leading zeros + -- any string of bytes, of the indicated length + :: + ; + :: ? "|" ( | )* "|" ; + :: "#" ( | )* "#" ; + :: ? + :: "\"" "\"" + :: "(" ( | )* ")" ; + :: * ; + :: | | ; + :: | | ; + :: "a" | ... | "z" ; + :: "A" | ... | "Z" ; + :: "0" | ... | "9" ; + :: | "A" | ... | "F" | "a" | ... | "f" ; + :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; + :: " " | "\t" | "\r" | "\n" ; + :: | | "+" | "/" | "=" ; + :: "" ; + +8. In-memory representations + +For processing, the S-expression would typically be parsed and represented +in memory in a more more amenable to efficient processing. We suggest +two alternatives: + + -- "list-structure" + + -- "array-layout" + +We only sketch these here, as they are only suggestive. The code referenced +below illustrates these styles in more detail. + + +8.1. List-structure memory representation + +Here there are separate records for simple-strings, strings, and +lists. An S-expression of the form ("abc" "de") would require two +records for the simple strings, two for the strings, and two for the +list elements. This is a fairly conventional representation, and +details are omitted here. + +8.2 Array-layout memory representation + +Here each S-expression is represented as a contiguous array of bytes. +The first byte codes the "type" of the S-expression: + + 01 octet-string + + 02 octet-string with display-hint + + 03 beginning of list (and 00 is used for "end of list") + +Each of the three types is immediately followed by a k-byte integer +indicating the size (in bytes) of the following representation. Here +k is an integer that depends on the implementation, it might be +anywhere from 2 to 8, but would be fixed for a given implementation; +it determines the size of the objects that can be handled. The transport +and canonical representations are independent of the choice of k made by +the implementation. + +Although the length of lists are not given in the usual S-expression +notations, it is easy to fill them in when parsing; when you reach a +right-parenthesis you know how long the list representation was, and +where to go back to fill in the missing length. + + +8.2.1 Octet string + +This is represented as follows: + + 01 + +For example (here k = 2) + + 01 0003 a b c + +8.2.2 Octet-string with display-hint + +This is represented as follows: + + 02 + 01 /* for display-type */ + 01 /* for octet-string */ + +For example, the S-expression + + [gif] #61626364# + +would be represented as (with k = 2) + + 02 000d + 01 0003 g i f + 01 0004 61 62 63 64 + +8.2.3 List + +This is represented as + + 03 ... 00 + +For example, the list (abc [d]ef (g)) is represented in memory as (with k=2) + + 03 001b + 01 0003 a b c + 02 0009 + 01 0001 d + 01 0002 e f + 03 0005 + 01 0001 g + 00 + 00 + +9. Code + +There is code available for reading and parsing the various +S-expression formats proposed here. + +See http://theory.lcs.mit.edu/~rivest/sexp.html + + +10. Utilization of S-expressions + +This note has described S-expressions in general form. Application writers +may wish to restrict their use of S-expressions in various ways. Here are +some possible restrictions that might be considered: + + -- no display-hints + -- no lengths on hexadecimal, quoted-strings, or base-64 encodings + -- no empty lists + -- no empty octet-strings + -- no lists having another list as its first element + -- no base-64 or hexadecimal encodings + -- fixed limits on the size of octet-strings + +11. Historical note + +The S-expression technology described here was originally developed +for ``SDSI'' (the Simple Distributed Security Infrastructure by +Lampson and Rivest [SDSI]) in 1996, although the origins clearly date +back to McCarthy's LISP programming language. It was further refined +and improved during the merger of SDSI and SPKI [SPKI] during the +first half of 1997. S-expressions are similar to, but more readable +and flexible than, Bernstein's "net-strings" [BERN]. + +12. References + +[SDSI] "A Simple Distributed Security Architecture", by + Butler Lampson, and Ronald L. Rivest + http://theory.lcs.mit.edu/~cis/sdsi.html + +[SPKI] SPKI--A + Simple Public Key Infrastructure + +[BERN] Dan Bernstein's "net-strings"; Internet Draft + draft-bernstein-netstrings-02.txt + +Author's Address + + Ronald L. Rivest + Room 324, 545 Technology Square + MIT Laboratory for Computer Science + Cambridge, MA 02139 + + rivest@theory.lcs.mit.edu + + diff --git a/factory.ml b/factory.ml new file mode 100644 index 0000000..3eef79c --- /dev/null +++ b/factory.ml @@ -0,0 +1,38 @@ +open Printf +open Sexp +open Datastructures + +type factory_t = Sexp.t -> Sexp.t option + +let classes = ref StringMap.empty + +let register_class name factory = + if StringMap.mem name !classes + then (printf "ERROR: Duplicate node class name %s\n%!" name; + exit 1) + else classes := StringMap.add name factory !classes + +let lookup_class name = + try Some (StringMap.find name !classes) + with Not_found -> None + +let factory_handler n sexp = + match Message.message_of_sexp sexp with + | Message.Create (Str classname, arg, Str reply_sink, Str reply_name) -> + (match lookup_class classname with + | Some factory -> + let reply = + match factory arg with + | None -> + Message.create_ok + | Some explanation -> + Message.create_failed explanation + in + Node.post_ignore reply_sink (Str reply_name) reply (Str "") + | None -> + printf "WARNING: Node class not found <<%s>>\n%!" classname) + | m -> + Util.message_not_understood "factory" m + +let init () = + Node.bind_ignore ("factory", Node.make "factory" factory_handler) diff --git a/fqueue.ml b/fqueue.ml new file mode 100644 index 0000000..fc5153b --- /dev/null +++ b/fqueue.ml @@ -0,0 +1,86 @@ +(* Functional queue. *) + +type 'a t = Q of int * 'a list * 'a list + +let empty = Q (0, [], []) + +let singleton v = Q (1, [], [v]) + +let length (Q (n, _, _)) = n + +let is_empty q = (length q = 0) + +let push_back (Q (n, front, back)) v = Q (n + 1, front, v :: back) +let push_front (Q (n, front, back)) v = Q (n + 1, v :: front, back) + +let push_back_all (Q (n, front, back)) vs = Q (n + List.length vs, front, List.rev_append vs back) +let push_front_all (Q (n, front, back)) vs = Q (n + List.length vs, List.append vs front, back) + +let push_back_all_rev (Q (n, front, back)) vs = + Q (n + List.length vs, front, List.append vs back) +let push_front_all_rev (Q (n, front, back)) vs = + Q (n + List.length vs, List.rev_append vs front, back) + +let pop_ remote local = + match local with + | [] -> + (match List.rev remote with + | [] -> None + | v :: rest -> Some (v, [], rest)) + | v :: rest -> Some (v, remote, rest) + +let pop_back (Q (n, front, back)) = + match pop_ front back with + | Some (v, front', back') -> Some (v, Q (n - 1, front', back')) + | None -> None + +let pop_front (Q (n, front, back)) = + match pop_ back front with + | Some (v, back', front') -> Some (v, Q (n - 1, front', back')) + | None -> None + +let peek_back (Q (n, front, back)) = + match pop_ front back with + | Some (v, front', back') -> Some (v, Q (n - 1, front', v :: back')) + | None -> None + +let peek_front (Q (n, front, back)) = + match pop_ back front with + | Some (v, back', front') -> Some (v, Q (n - 1, v :: front', back')) + | None -> None + +let unsome x = + match x with + | Some v -> v + | None -> raise Not_found + +let really_pop_back q = unsome (pop_back q) +let really_pop_front q = unsome (pop_front q) + +let really_peek_back q = unsome (peek_back q) +let really_peek_front q = unsome (peek_front q) + +let slow_peek_back q = let (v, _) = unsome (peek_back q) in v +let slow_peek_front q = let (v, _) = unsome (peek_front q) in v + +let of_list vs = Q (List.length vs, vs, []) +let of_list_rev vs = Q (List.length vs, [], vs) + +let to_list (Q (_, front, back)) = List.append front (List.rev back) +let to_list_rev (Q (_, front, back)) = List.append back (List.rev front) + +(* Warning: doesn't operate in order *) +let map f (Q (n, front, back)) = Q (n, List.map f front, List.rev_map f back) + +let append (Q (n1, front1, back1)) (Q (n2, front2, back2)) = + Q (n1 + n2, front1 @ List.rev_append back1 front2, back2) + +let iter f (Q (_, front, back)) = + List.iter f front; + List.iter f (List.rev back) + +let fold_left f seed (Q (_, front, back)) = + List.fold_right (fun v s -> f s v) back (List.fold_left f seed front) + +let fold_right f seed (Q (_, front, back)) = + List.fold_right f front (List.fold_left (fun s v -> f v s) seed back) diff --git a/messages.json b/messages.json new file mode 100644 index 0000000..163aab7 --- /dev/null +++ b/messages.json @@ -0,0 +1,42 @@ +[ + { + "selector": "create", + "args": ["classname", "arg", "reply-sink", "reply-name"] + }, + { + "selector": "create-ok", + "args": [] + }, + { + "selector": "create-failed", + "args": ["reason"] + }, + { + "selector": "subscribed", + "args": ["source", "filter", "sink", "name"] + }, + { + "selector": "unsubscribed", + "args": ["source", "filter", "sink", "name"] + }, + { + "selector": "post", + "args": ["name", "body", "token"] + }, + { + "selector": "subscribe", + "args": ["filter", "sink", "name", "reply_sink", "reply_name"] + }, + { + "selector": "subscribe-ok", + "args": ["token"] + }, + { + "selector": "unsubscribe", + "args": ["token"] + }, + { + "selector": "error", + "args": ["message", "details"] + } +] diff --git a/node.ml b/node.ml new file mode 100644 index 0000000..05da942 --- /dev/null +++ b/node.ml @@ -0,0 +1,76 @@ +open Printf +open Datastructures + +type handle_message_t = t -> Sexp.t -> unit +and t = { + mutable names: StringSet.t; + class_name: string; + handle_message: handle_message_t + } + +let directory = ref StringMap.empty + +let local_container_name () = "server" + +let make class_name handler = { + names = StringSet.empty; + class_name = class_name; + handle_message = handler +} + +let lookup name = + try Some (StringMap.find name !directory) + with Not_found -> None + +let bind (filter, node) = + if filter = "" + then (printf "WARNING: Binding to empty name forbidden\n%!"; false) + else + if StringMap.mem filter !directory + then false + else (directory := StringMap.add filter node !directory; + node.names <- StringSet.add filter node.names; + printf "INFO: Binding node <<%s>> of class %s\n%!" filter node.class_name; + true) + +(* For use in factory constructor functions, hence the odd return type and values *) +let make_named class_name node_name handler = + let node = make class_name handler in + if bind (node_name, node) then None else Some (Sexp.Str "bind-failed") + +let unbind name = + match lookup name with + | Some n -> + printf "INFO: Unbinding node <<%s>> of class %s\n%!" name n.class_name; + n.names <- StringSet.remove name n.names; + directory := StringMap.remove name !directory; + true + | None -> + false + +let unbind_all n = + StringSet.iter (fun name -> ignore (unbind name)) n.names; + n.names <- StringSet.empty + +let send name body = + match lookup name with + | Some n -> n.handle_message n body; true + | None -> false + +let post name label body token = + send name (Message.post (label, body, token)) + +let bind_ignore (filter, node) = + if bind (filter, node) + then () + else printf "WARNING: Duplicate binding <<%s>>\n%!" filter + +let send_ignore name body = + if send name body + then () + else (printf "WARNING: send to missing node %s: " name; + Sexp.output_sexp Pervasives.stdout body; + print_newline ()) + +let post_ignore name label body token = + send_ignore name (Message.post (label, body, token)) diff --git a/ocamlmsg.ml b/ocamlmsg.ml new file mode 100644 index 0000000..0372446 --- /dev/null +++ b/ocamlmsg.ml @@ -0,0 +1,23 @@ +open Unix +open Printf +open Thread + +let rec accept_loop sock = + let (s, peername) = accept sock in + ignore (Relay.start_relay (s, peername)); + accept_loop sock + +let start_net port_number = + let sock = socket PF_INET SOCK_STREAM 0 in + setsockopt sock SO_REUSEADDR true; + bind sock (ADDR_INET (inet_addr_of_string "0.0.0.0", port_number)); + listen sock 5; + printf "INFO: Accepting connections on port %d.\n%!" port_number; + accept_loop sock + +let _ = + printf "ocamlmsg ALPHA, Copyright (C) 2012 Tony Garnock-Jones. All rights reserved.\n%!"; + Uuid.init (); + Factory.init (); + Queuenode.init (); + start_net 5671 diff --git a/queuenode.ml b/queuenode.ml new file mode 100644 index 0000000..e49e907 --- /dev/null +++ b/queuenode.ml @@ -0,0 +1,83 @@ +open Sexp + +type t = { + name: string; + subscriptions: Subscription.set_t; + ch: Message.t Event.channel; + mutable backlog: Sexp.t Fqueue.t; + mutable waiters: Subscription.t Fqueue.t; + } + +let classname = "queue" + +let rec do_burst info n = + (* + Printf.printf "INFO: do_burst %d backlog %d waiters %d ticks left\n%!" + (Fqueue.length info.backlog) + (Fqueue.length info.waiters) + n; + *) + if Fqueue.is_empty info.backlog then false + else + if Fqueue.is_empty info.waiters then false + else + if n = 0 then true (* maybe more work available, but should poll for outside events *) + else + let (body, new_backlog) = Fqueue.really_pop_front info.backlog in + let (sub, new_waiters) = Fqueue.really_pop_front info.waiters in + info.waiters <- new_waiters; + if Subscription.send_to_subscription info.name info.subscriptions sub body + then + (info.waiters <- Fqueue.push_back info.waiters sub; + info.backlog <- new_backlog; + do_burst info (n - 1)) + else + do_burst info n + +let rec process_and_wait info = + if not (do_burst info 1000) + then Event.sync (Event.receive info.ch) + else + match Event.poll (Event.receive info.ch) with + | Some m -> m + | None -> process_and_wait info + +let shoveller info = + let rec loop () = + match process_and_wait info with + | Message.Post (name, body, token) -> + info.backlog <- Fqueue.push_back info.backlog body; + loop () + | Message.Subscribe (filter, Str sink, name, Str reply_sink, reply_name) -> + let sub = + Subscription.create info.name info.subscriptions + filter sink name reply_sink reply_name + in + info.waiters <- Fqueue.push_back info.waiters sub; + loop () + | Message.Unsubscribe (Str token) -> + Subscription.delete info.name info.subscriptions token; + loop () + | m -> + Util.message_not_understood "queue" m; + loop () + in loop () + +let queue_factory arg = + match arg with + | (Arr [Str name]) -> + let info = { + name = name; + subscriptions = Subscription.new_set (); + ch = Event.new_channel (); + backlog = Fqueue.empty; + waiters = Fqueue.empty + } in + ignore (Util.create_thread name None shoveller info); + let queue_handler n sexp = Event.sync (Event.send info.ch (Message.message_of_sexp sexp)) in + Node.make_named classname name queue_handler + | _ -> + Some (Str "bad-arg") + +let init () = + Factory.register_class classname queue_factory diff --git a/relay.ml b/relay.ml new file mode 100644 index 0000000..8601e8b --- /dev/null +++ b/relay.ml @@ -0,0 +1,88 @@ +open Unix +open Printf +open Thread +open Sexp + +let connection_count = ref 0 + +let endpoint_name n = + match n with + | ADDR_INET (host, port) -> sprintf "%s:%d" (string_of_inet_addr host) port + | _ -> "??unknown??" + +let send_error ch message details = + let m = Message.error (Str message, details) in + print_string "WARNING: Sending error: "; + output_sexp Pervasives.stdout m; + print_newline (); + output_sexp_and_flush ch m + +let send_sexp_syntax_error ch explanation = + send_error ch explanation (Str "http://people.csail.mit.edu/rivest/Sexp.txt") + +let dispatch_message n ch m = + match m with + | Message.Post (Str name, body, token) -> + Node.send_ignore name body + | Message.Subscribe (Str filter, sink, name, Str reply_sink, Str reply_name) -> + if Node.bind(filter, n) + then Node.post_ignore + reply_sink + (Str reply_name) + (Message.subscribe_ok (Str filter)) + (Str "") + else printf "WARNING: Bind failed <<%s>>\n%!" filter + | Message.Unsubscribe token -> + () (* %%% TODO *) + | _ -> + send_error ch "Message not understood" (Message.sexp_of_message m) + +let output_thread ch cout = + let rec loop v = + match v with + | Some (Some sexp) -> + output_sexp cout sexp; + loop (Event.poll (Event.receive ch)) + | Some None -> + () + | None -> + (* flush cout; *) + loop (Some (Event.sync (Event.receive ch))) + in loop None + +let relay_handler mtx cout n m = + Mutex.lock mtx; + output_sexp cout m; + Mutex.unlock mtx + +let relay_main peername cin cout = + printf "INFO: Accepted connection from %s\n%!" (endpoint_name peername); + output_sexp_and_flush cout (Arr [Str "hop"; Str ""]); + output_sexp_and_flush cout + (Message.subscribe (Str (Node.local_container_name()), + Str "", Str "", + Str "", Str "")); + let mtx = Mutex.create () in + let n = Node.make "relay" (relay_handler mtx cout) in + (try + while true do + dispatch_message n cout (Message.message_of_sexp (Sexp.input_sexp cin)) + done + with + | End_of_file -> + printf "INFO: Disconnecting %s normally.\n%!" (endpoint_name peername) + | Sexp.Syntax_error explanation -> + send_sexp_syntax_error cout explanation); + Node.unbind_all n + +let start_relay' (s, peername) = + let cin = in_channel_of_descr s in + let cout = out_channel_of_descr s in + connection_count := 1 + !connection_count; + relay_main peername cin cout; + connection_count := 0 + !connection_count; + flush cout; + close s + +let start_relay (s, peername) = + Util.create_thread (endpoint_name peername ^ " input") None start_relay' (s, peername) diff --git a/sexp.ml b/sexp.ml new file mode 100644 index 0000000..3191f2a --- /dev/null +++ b/sexp.ml @@ -0,0 +1,87 @@ +(* SPKI SEXP *) + +exception Syntax_error of string + +type display_hint_t = {hint : t; body : t} +and t = + | Str of string + | Hint of display_hint_t + | Arr of t list + +let _output_str ch s = + output_string ch (string_of_int (String.length s)); + output_char ch ':'; + output_string ch s + +let rec output_sexp ch x = + match x with + | Str s -> + _output_str ch s + | Hint {hint = h; body = b} -> + output_char ch '['; + output_sexp ch h; + output_char ch ']'; + output_sexp ch b + | Arr xs -> + output_char ch '('; + output_sexps ch xs; + output_char ch ')' + +and output_sexps ch xs = + match xs with + | [] -> + () + | x :: xs' -> + output_sexp ch x; + output_sexps ch xs' + +let output_sexp_and_flush ch x = + output_sexp ch x; + flush ch + +let char_numeric c = '0' <= c && c <= '9' +let char_whitespace c = c <= ' ' + +let digit_val c = (int_of_char c) - (int_of_char '0') + +let input_bytes count ch = + let buf = String.create count in (* mutable strings?!?! *) + really_input ch buf 0 count; + Str buf + +let syntax_error explanation = raise (Syntax_error explanation) + +let rec input_simple_string len ch = + match input_char ch with + | ':' -> input_bytes len ch + | b when char_numeric b -> input_simple_string (len * 10 + digit_val b) ch + | _ -> syntax_error "Bad simple-string length character" + +let rec input_sexp_list ch = + let rec collect acc = + match input_sexp_inner ch with + | None -> Arr (List.rev acc) + | Some v -> collect (v :: acc) + in collect [] + +and input_sexp_inner ch = + match input_char ch with + | '(' -> Some (input_sexp_list ch) + | ')' -> None + | '[' -> + let hint = input_simple_string 0 ch in + (match input_char ch with + | ']' -> Some (Hint {hint = hint; body = input_simple_string 0 ch}) + | _ -> syntax_error "Missing close-bracket in display hint") + | b when char_numeric b -> + Some (input_simple_string (digit_val b) ch) + | b when char_whitespace b -> + (* Convenience for testing *) + input_sexp_inner ch + | _ -> + syntax_error "Bad SEXP input character" + +let input_sexp ch = + match input_sexp_inner ch with + | None -> syntax_error "Unexpected end of list" + | Some v -> v diff --git a/subscription.ml b/subscription.ml new file mode 100644 index 0000000..4b44e3b --- /dev/null +++ b/subscription.ml @@ -0,0 +1,39 @@ +open Datastructures + +type t = { + mutable live: bool; + uuid: Uuid.t; + filter: Sexp.t; + sink: string; + name: Sexp.t + } + +type set_t = t StringMap.t ref + +let new_set () = ref StringMap.empty + +let create source subs filter sink name reply_sink reply_name = + let uuid = Uuid.create () in + let sub = { + live = true; + uuid = uuid; + filter = filter; + sink = sink; + name = name + } in + subs := StringMap.add uuid sub !subs; + Node.post_ignore reply_sink reply_name (Message.subscribe_ok (Sexp.Str uuid)) (Sexp.Str ""); + sub + +let delete source subs uuid = + (try (StringMap.find uuid !subs).live <- false + with Not_found -> ()); + subs := StringMap.remove uuid !subs + +let send_to_subscription source subs sub body = + if not sub.live + then false + else + if Node.post sub.sink sub.name body (Sexp.Str sub.uuid) + then true + else (delete source subs sub.uuid; false) diff --git a/test_fqueue.ml b/test_fqueue.ml new file mode 100644 index 0000000..3533e02 --- /dev/null +++ b/test_fqueue.ml @@ -0,0 +1,33 @@ +#use "fqueue.ml" + +let q12 = append (singleton 1) (singleton 2) +let q1234 = append (of_list [1; 2]) (of_list [3; 4]) + +let _ = + assert (length empty = 0); + assert (is_empty empty); + assert (pop_front (push_back empty 1) = Some (1, empty)); + assert (pop_back (push_front empty 1) = Some (1, empty)); + assert (to_list (of_list [1; 2; 3]) = [1; 2; 3]); + assert (length (of_list [1; 2; 3]) = 3); + assert (pop_back (of_list [1; 2; 3]) = Some (3, Q(2, [], [2; 1]))); + assert (pop_front (of_list [1; 2; 3]) = Some (1, of_list [2; 3])); + assert (to_list (push_back_all empty [1; 2; 3]) = [1; 2; 3]); + assert (to_list (push_front_all empty [1; 2; 3]) = [1; 2; 3]); + assert (to_list (push_back_all_rev empty [1; 2; 3]) = [3; 2; 1]); + assert (to_list (push_front_all_rev empty [1; 2; 3]) = [3; 2; 1]); + assert ((try slow_peek_back empty with _ -> 1) = 1); + assert ((try slow_peek_front empty with _ -> 1) = 1); + assert ((try slow_peek_back (singleton 1) with _ -> 2) = 1); + assert ((try slow_peek_front (singleton 1) with _ -> 2) = 1); + assert ((try slow_peek_back (of_list [1; 2; 3]) with _ -> 99) = 3); + assert ((try slow_peek_front (of_list [1; 2; 3]) with _ -> 99) = 1); + assert (to_list_rev (of_list [1; 2; 3]) = [3; 2; 1]); + assert (to_list (of_list_rev [1; 2; 3]) = [3; 2; 1]); + assert (to_list_rev (of_list_rev [1; 2; 3]) = [1; 2; 3]); + assert (map ((+) 1) (of_list [1; 2; 3]) = of_list [2; 3; 4]); + assert (to_list q12 = [1; 2]); + assert (fold_left (fun s v -> v :: s) [] q1234 = [4; 3; 2; 1]); + assert (fold_right (fun v s -> v :: s) [] q1234 = [1; 2; 3; 4]); + assert (fold_left (fun s v -> v :: s) [] q12 = [2; 1]); + assert (fold_right (fun v s -> v :: s) [] q12 = [1; 2]); diff --git a/util.ml b/util.ml new file mode 100644 index 0000000..776bc39 --- /dev/null +++ b/util.ml @@ -0,0 +1,18 @@ +open Printf + +let message_not_understood context m = + printf "WARNING: Message not understood in %s: " context; + Sexp.output_sexp stdout (Message.sexp_of_message m); + print_newline () + +let create_thread name cleanup main initarg = + let guarded_main initarg = + try + main initarg + with e -> + printf "WARNING: Thread <<%s>> died with %s\n%!" name (Printexc.to_string e); + (match cleanup with + | Some cleaner -> cleaner () + | None -> ()) + in + Thread.create guarded_main initarg diff --git a/uuid.ml b/uuid.ml new file mode 100644 index 0000000..74b2115 --- /dev/null +++ b/uuid.ml @@ -0,0 +1,13 @@ +type t = string + +let create () = + (* 128 bits *) + let w1 = Random.bits () in + let w2 = Random.bits () in + let w3 = Random.bits () in + let w4 = Random.bits () in + let bb = Random.int 256 in + Printf.sprintf "%08x%08x%08x%08x%02x" w1 w2 w3 w4 bb + +let init () = + Random.self_init ()