From 4f2e19b0b20e0b58aa4bfad45dc22b07b6812984 Mon Sep 17 00:00:00 2001 From: Emery Hemingway Date: Fri, 9 Feb 2024 15:24:45 +0000 Subject: [PATCH] Add XSLT processor --- README.md | 22 ++++ assertions.prs | 4 + config.prs | 4 + shell.nix | 5 +- src/schema/assertions.nim | 13 ++- src/schema/config.nim | 8 ++ src/syndesizer.nim | 4 +- src/syndesizer/xslt_actor.nim | 211 ++++++++++++++++++++++++++++++++++ syndicate_utils.nimble | 2 +- 9 files changed, 267 insertions(+), 6 deletions(-) create mode 100644 src/syndesizer/xslt_actor.nim diff --git a/README.md b/README.md index 8d97b1b..9179314 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,28 @@ Examples: ] ``` +### XSLT processor + +Perform XML stylesheet transformations. For a given textual XSLT stylesheet and a textual XML document generate an abstract XML document in Preserves form. Inputs may be XML text or paths to XML files. + +``` +# Configuration example +let ?ds = dataspace +$ds [ + ? [ + ? [ + $log ! + ] + ] +] + +> +? ?cap> $cap [ + + +] +``` + --- ## mintsturdyref diff --git a/assertions.prs b/assertions.prs index 8e0f810..ea85365 100644 --- a/assertions.prs +++ b/assertions.prs @@ -8,3 +8,7 @@ FileSystemUsage = . Pulse = . XmlTranslation = . + +XsltTransform = . +XsltItems = [XsltItem ...]. +XsltItem = string. diff --git a/config.prs b/config.prs index a90f3f8..8a3bdb8 100644 --- a/config.prs +++ b/config.prs @@ -51,5 +51,9 @@ XmlTranslatorArguments = . +XsltArguments = . + # Reused from syndicate-protocols/transportAddress Tcp = . diff --git a/shell.nix b/shell.nix index 2333a0b..d679438 100644 --- a/shell.nix +++ b/shell.nix @@ -1,7 +1,8 @@ { pkgs ? import { } }: pkgs.buildNimPackage { - name = "dummy"; + name = "syndicate_utils"; propagatedNativeBuildInputs = [ pkgs.pkg-config ]; - propagatedBuildInputs = [ pkgs.postgresql pkgs.sqlite ]; + propagatedBuildInputs = [ pkgs.postgresql pkgs.sqlite pkgs.libxml2 pkgs.libxslt ]; + lockFile = ./lock.json; } diff --git a/src/schema/assertions.nim b/src/schema/assertions.nim index 69723d1..1cec8b3 100644 --- a/src/schema/assertions.nim +++ b/src/schema/assertions.nim @@ -3,10 +3,12 @@ import preserves type + XsltItems* = seq[XsltItem] Pulse* {.preservesRecord: "pulse".} = object `periodSec`*: float `proxy`* {.preservesEmbedded.}: Value + XsltItem* = string XmlTranslation* {.preservesRecord: "xml-translation".} = object `xml`*: string `pr`*: Value @@ -15,8 +17,15 @@ type `path`*: string `size`*: BiggestInt -proc `$`*(x: Pulse | XmlTranslation | FileSystemUsage): string = + XsltTransform* {.preservesRecord: "xslt-transform".} = object + `stylesheet`*: string + `input`*: string + `output`*: Value + +proc `$`*(x: XsltItems | Pulse | XsltItem | XmlTranslation | FileSystemUsage | + XsltTransform): string = `$`(toPreserves(x)) -proc encode*(x: Pulse | XmlTranslation | FileSystemUsage): seq[byte] = +proc encode*(x: XsltItems | Pulse | XsltItem | XmlTranslation | FileSystemUsage | + XsltTransform): seq[byte] = encode(toPreserves(x)) diff --git a/src/schema/config.nim b/src/schema/config.nim index 2f3bb86..ca4650a 100644 --- a/src/schema/config.nim +++ b/src/schema/config.nim @@ -27,6 +27,12 @@ type JsonSocketTranslatorArguments* {.preservesRecord: "json-socket-translator".} = object `field0`*: JsonSocketTranslatorArgumentsField0 + XsltArgumentsField0* {.preservesDictionary.} = object + `dataspace`* {.preservesEmbedded.}: EmbeddedRef + + XsltArguments* {.preservesRecord: "xslt".} = object + `field0`*: XsltArgumentsField0 + WebhooksArgumentsField0* {.preservesDictionary.} = object `endpoints`*: Table[seq[string], EmbeddedRef] `listen`*: Tcp @@ -84,6 +90,7 @@ type proc `$`*(x: WebsocketArguments | JsonTranslatorArguments | JsonTranslatorConnected | JsonSocketTranslatorArguments | + XsltArguments | WebhooksArguments | FileSystemUsageArguments | SqliteArguments | @@ -98,6 +105,7 @@ proc `$`*(x: WebsocketArguments | JsonTranslatorArguments | proc encode*(x: WebsocketArguments | JsonTranslatorArguments | JsonTranslatorConnected | JsonSocketTranslatorArguments | + XsltArguments | WebhooksArguments | FileSystemUsageArguments | SqliteArguments | diff --git a/src/syndesizer.nim b/src/syndesizer.nim index 66b489e..fcf2f5e 100644 --- a/src/syndesizer.nim +++ b/src/syndesizer.nim @@ -17,7 +17,8 @@ import ./syndesizer/[ pulses, webhooks, websockets, - xml_translator] + xml_translator, + xslt_actor] when withPostgre: import ./syndesizer/postgre_actor @@ -36,6 +37,7 @@ runActor("syndesizer") do (turn: var Turn; root: Cap): discard spawnWebhookActor(turn, root) discard spawnWebsocketActor(turn, root) discard spawnXmlTranslator(turn, root) + discard spawnXsltActor(turn, root) when withPostgre: discard spawnPostgreActor(turn, root) when withSqlite: diff --git a/src/syndesizer/xslt_actor.nim b/src/syndesizer/xslt_actor.nim new file mode 100644 index 0000000..8f63b7f --- /dev/null +++ b/src/syndesizer/xslt_actor.nim @@ -0,0 +1,211 @@ +# SPDX-FileCopyrightText: ☭ Emery Hemingway +# SPDX-License-Identifier: Unlicense + +import std/[os, strutils] +import preserves, syndicate +import ../schema/[assertions, config] + +{.passC: staticExec("pkg-config --cflags libxslt").} +{.passL: staticExec("pkg-config --libs libxslt").} + +{.pragma: libxslt, header: "libxslt/xslt.h", importc.} + +type + xmlElementType {.libxslt.} = enum + XML_ELEMENT_NODE = 1, + XML_ATTRIBUTE_NODE = 2, + XML_TEXT_NODE = 3, + XML_CDATA_SECTION_NODE = 4, + XML_ENTITY_REF_NODE = 5, + XML_ENTITY_NODE = 6, + XML_PI_NODE = 7, + XML_COMMENT_NODE = 8, + XML_DOCUMENT_NODE = 9, + XML_DOCUMENT_TYPE_NODE = 10, + XML_DOCUMENT_FRAG_NODE = 11, + XML_NOTATION_NODE = 12, + XML_HTML_DOCUMENT_NODE = 13, + XML_DTD_NODE = 14, + XML_ELEMENT_DECL = 15, + XML_ATTRIBUTE_DECL = 16, + XML_ENTITY_DECL = 17, + XML_NAMESPACE_DECL = 18, + XML_XINCLUDE_START = 19, + XML_XINCLUDE_END = 20 + + xmlNsPtr = ptr xmlNs + xmlNs {.libxslt.} = object + next: xmlNsPtr + href, prefix: cstring + + xmlAttrPtr = ptr xmlAttr + xmlAttr {.libxslt.} = object + name: cstring + next: xmlAttrPtr + children: xmlNodePtr + + xmlElementContentPtr = ptr xmlElementContent + xmlElementContent {.libxslt.} = object + encoding: cstring + + xmlNodePtr = ptr xmlNode + xmlNode {.libxslt.} = object + `type`: xmlElementType + name: cstring + children, next: xmlNodePtr + content: cstring + properties: xmlAttrPtr + nsDef: xmlNsPtr + + xmlDocPtr {.libxslt.} = distinct pointer + xsltStylesheetPtr {.libxslt.} = distinct pointer + +proc isNil(x: xmlDocPtr): bool {.borrow.} +proc isNil(x: xsltStylesheetPtr): bool {.borrow.} + +proc xmlReadMemory(buf: pointer; len: cint; url, enc: cstring; opts: cint): xmlDocPtr {.libxslt.} + +proc xmlReadMemory(buf: string; uri = "noname.xml"): xmlDocPtr = + xmlReadMemory(buf[0].addr, buf.len.cint, uri, "UTF-8", 0) + +proc xmlParseFile(filename: cstring): xmlDocPtr {.libxslt.} + +proc xmlFreeDoc(p: xmlDocPtr) {.libxslt.} + +proc xmlDocGetRootElement(doc: xmlDocPtr): xmlNodePtr {.libxslt.} + +proc loadXmlDoc(text: string): xmlDocPtr = + if text.startsWith("/") and fileExists(text): + xmlParseFile(text) + else: + xmlReadMemory(text, "noname.xml") + +proc xsltParseStylesheetFile(filename: cstring): xsltStylesheetPtr {.libxslt.} + +proc xsltParseStylesheetDoc(doc: xmlDocPtr): xsltStylesheetPtr {.libxslt.} + +proc xsltParseStylesheetDoc(text: string; uri = "noname.xml"): xsltStylesheetPtr = + var doc = xmlReadMemory(text, uri) + result = xsltParseStylesheetDoc(doc) + # implicit free of doc + +proc loadStylesheet(text: string): xsltStylesheetPtr = + if text.startsWith("/") and fileExists(text): + xsltParseStylesheetFile(text) + else: + xsltParseStylesheetDoc(text, "noname.xsl") + +proc xsltApplyStylesheet( + style: xsltStylesheetPtr, doc: xmlDocPtr, params: cstringArray): xmlDocPtr {.libxslt.} + +proc xsltFreeStylesheet(style: xsltStylesheetPtr) {.libxslt.} + +proc xsltSaveResultToString(txt: ptr pointer; len: ptr cint; res: xmlDocPtr; style: xsltStylesheetPtr): cint {.libxslt.} + +proc c_free*(p: pointer) {.importc: "free", header: "".} + +proc xsltSaveResultToString(res: xmlDocPtr; style: xsltStylesheetPtr): string = + var + txt: pointer + len: cint + if xsltSaveResultToString(addr txt, addr len, res, style) < 0: + raise newException(CatchableError, "xsltSaveResultToString failed") + if len > 0: + result = newString(int len) + copyMem(result[0].addr, txt, len) + c_free(txt) + +proc initLibXml = + discard + +proc XML_GET_CONTENT(xn: xmlNodePtr): xmlElementContentPtr {.libxslt.} + +proc textContent(xn: xmlNodePtr): string = + if xn.content != nil: result = $xn.content + +proc content(attr: xmlAttrPtr): string = + var child = attr.children + while not child.isNil: + result.add child.content + child = child.next + +proc preserveSiblings(result: var seq[Value]; first: xmlNodePtr) = + var xn = first + while not xn.isNil: + case xn.type + of XML_ELEMENT_NODE: + var child = Value(kind: pkRecord) + if not xn.nsDef.isNil: + child.record.add initDictionary() + var ns = xn.nsDef + while not ns.isNil: + if not ns.href.isNil: + var key = Value(kind: pkString) + if ns.prefix.isNil: + key.string = "xmlns" + else: + key.string = "xmlns:" & $ns.prefix + child.record[0][key] = toPreserves($ns.href) + ns = ns.next + + if not xn.properties.isNil: + if child.record.len < 1: + child.record.add initDictionary() + var attr = xn.properties + while not attr.isNil: + var + key = toPreserves($attr.name) + val = toPreserves(attr.content) + child.record[0][key] = val + attr = attr.next + if not xn.children.isNil: + preserveSiblings(child.record, xn.children) + child.record.add tosymbol($xn.name) + result.add child + of XML_TEXT_NODE: + result.add textContent(xn).toPreserves + else: + stderr.writeLine "not an XML_ELEMENT_NODE - ", $xn.type + xn = xn.next + +proc toPreservesHook*(xn: xmlNodePtr): Value = + var items = newSeqofCap[Value](1) + preserveSiblings(items, xn) + items[0] + +proc spawnXsltActor*(turn: var Turn; root: Cap): Actor {.discardable.} = + spawn("xslt", turn) do (turn: var Turn): + initLibXml() + during(turn, root, ?:XsltArguments) do (ds: Cap): + let sheetsPat = ?Observe(pattern: !XsltTransform) ?? {0: grab(), 1: grab()} + during(turn, ds, sheetsPat) do (stylesheet: Literal[string], input: Literal[string]): + let cur = loadStylesheet(stylesheet.value) + if cur.isNil: + stderr.writeLine "failed to parse stylesheet" + else: + let doc = loadXmlDoc(input.value) + if doc.isNil: + stderr.writeLine "failed to parse input document" + else: + let + params = allocCStringArray([]) + res = xsltApplyStylesheet(cur, doc, params) + if res.isNil: + stderr.writeLine "failed to apply stylesheet transformation" + else: + let output = xsltSaveResultToString(res, cur) + deallocCStringArray(params) + publish(turn, ds, XsltTransform( + stylesheet: stylesheet.value, + input: input.value, + output: xmlDocGetRootElement(res).toPreservesHook, + )) + xmlFreeDoc(res) + xmlFreeDoc(doc) + xsltFreeStylesheet(cur) + +when isMainModule: + import syndicate/relays + runActor("main") do (turn: var Turn; root: Cap): + connectStdio(turn, root) + spawnXsltActor(turn, root) diff --git a/syndicate_utils.nimble b/syndicate_utils.nimble index 8a1f744..bfc63b0 100644 --- a/syndicate_utils.nimble +++ b/syndicate_utils.nimble @@ -1,6 +1,6 @@ # Package -version = "20240208" +version = "20240209" author = "Emery Hemingway" description = "Utilites for Syndicated Actors and Synit" license = "unlicense"