Add XSLT processor

This commit is contained in:
Emery Hemingway 2024-02-09 15:24:45 +00:00
parent 1827c91da0
commit 4f2e19b0b2
9 changed files with 267 additions and 6 deletions

View File

@ -270,6 +270,28 @@ Examples:
]
```
### XSLT processor
Perform XML stylesheet transformations. For a given textual XSLT stylesheet and a textual XML document generate an abstract XML document in Preserves form. Inputs may be XML text or paths to XML files.
```
# Configuration example
let ?ds = dataspace
$ds [
? <xslt-transform "/stylesheet.xls" "/doc.xml" ?output> [
? <xml-translation ?text $output> [
$log ! <log "-" { xslt-output: $text }>
]
]
]
<require-service <daemon syndesizer>>
? <service-object <daemon syndesizer> ?cap> $cap [
<xml-translator { dataspace: $ds }>
<xslt { dataspace: $ds }>
]
```
---
## mintsturdyref

View File

@ -8,3 +8,7 @@ FileSystemUsage = <file-system-usage @path string @size int>.
Pulse = <pulse @periodSec float @proxy #:any>.
XmlTranslation = <xml-translation @xml string @pr any>.
XsltTransform = <xslt-transform @stylesheet string @input string @output any>.
XsltItems = [XsltItem ...].
XsltItem = string.

View File

@ -51,5 +51,9 @@ XmlTranslatorArguments = <xml-translator {
dataspace: #:any
}>.
XsltArguments = <xslt {
dataspace: #:any
}>.
# Reused from syndicate-protocols/transportAddress
Tcp = <tcp @host string @port int>.

View File

@ -1,7 +1,8 @@
{ pkgs ? import <nixpkgs> { } }:
pkgs.buildNimPackage {
name = "dummy";
name = "syndicate_utils";
propagatedNativeBuildInputs = [ pkgs.pkg-config ];
propagatedBuildInputs = [ pkgs.postgresql pkgs.sqlite ];
propagatedBuildInputs = [ pkgs.postgresql pkgs.sqlite pkgs.libxml2 pkgs.libxslt ];
lockFile = ./lock.json;
}

View File

@ -3,10 +3,12 @@ import
preserves
type
XsltItems* = seq[XsltItem]
Pulse* {.preservesRecord: "pulse".} = object
`periodSec`*: float
`proxy`* {.preservesEmbedded.}: Value
XsltItem* = string
XmlTranslation* {.preservesRecord: "xml-translation".} = object
`xml`*: string
`pr`*: Value
@ -15,8 +17,15 @@ type
`path`*: string
`size`*: BiggestInt
proc `$`*(x: Pulse | XmlTranslation | FileSystemUsage): string =
XsltTransform* {.preservesRecord: "xslt-transform".} = object
`stylesheet`*: string
`input`*: string
`output`*: Value
proc `$`*(x: XsltItems | Pulse | XsltItem | XmlTranslation | FileSystemUsage |
XsltTransform): string =
`$`(toPreserves(x))
proc encode*(x: Pulse | XmlTranslation | FileSystemUsage): seq[byte] =
proc encode*(x: XsltItems | Pulse | XsltItem | XmlTranslation | FileSystemUsage |
XsltTransform): seq[byte] =
encode(toPreserves(x))

View File

@ -27,6 +27,12 @@ type
JsonSocketTranslatorArguments* {.preservesRecord: "json-socket-translator".} = object
`field0`*: JsonSocketTranslatorArgumentsField0
XsltArgumentsField0* {.preservesDictionary.} = object
`dataspace`* {.preservesEmbedded.}: EmbeddedRef
XsltArguments* {.preservesRecord: "xslt".} = object
`field0`*: XsltArgumentsField0
WebhooksArgumentsField0* {.preservesDictionary.} = object
`endpoints`*: Table[seq[string], EmbeddedRef]
`listen`*: Tcp
@ -84,6 +90,7 @@ type
proc `$`*(x: WebsocketArguments | JsonTranslatorArguments |
JsonTranslatorConnected |
JsonSocketTranslatorArguments |
XsltArguments |
WebhooksArguments |
FileSystemUsageArguments |
SqliteArguments |
@ -98,6 +105,7 @@ proc `$`*(x: WebsocketArguments | JsonTranslatorArguments |
proc encode*(x: WebsocketArguments | JsonTranslatorArguments |
JsonTranslatorConnected |
JsonSocketTranslatorArguments |
XsltArguments |
WebhooksArguments |
FileSystemUsageArguments |
SqliteArguments |

View File

@ -17,7 +17,8 @@ import ./syndesizer/[
pulses,
webhooks,
websockets,
xml_translator]
xml_translator,
xslt_actor]
when withPostgre:
import ./syndesizer/postgre_actor
@ -36,6 +37,7 @@ runActor("syndesizer") do (turn: var Turn; root: Cap):
discard spawnWebhookActor(turn, root)
discard spawnWebsocketActor(turn, root)
discard spawnXmlTranslator(turn, root)
discard spawnXsltActor(turn, root)
when withPostgre:
discard spawnPostgreActor(turn, root)
when withSqlite:

View File

@ -0,0 +1,211 @@
# SPDX-FileCopyrightText: ☭ Emery Hemingway
# SPDX-License-Identifier: Unlicense
import std/[os, strutils]
import preserves, syndicate
import ../schema/[assertions, config]
{.passC: staticExec("pkg-config --cflags libxslt").}
{.passL: staticExec("pkg-config --libs libxslt").}
{.pragma: libxslt, header: "libxslt/xslt.h", importc.}
type
xmlElementType {.libxslt.} = enum
XML_ELEMENT_NODE = 1,
XML_ATTRIBUTE_NODE = 2,
XML_TEXT_NODE = 3,
XML_CDATA_SECTION_NODE = 4,
XML_ENTITY_REF_NODE = 5,
XML_ENTITY_NODE = 6,
XML_PI_NODE = 7,
XML_COMMENT_NODE = 8,
XML_DOCUMENT_NODE = 9,
XML_DOCUMENT_TYPE_NODE = 10,
XML_DOCUMENT_FRAG_NODE = 11,
XML_NOTATION_NODE = 12,
XML_HTML_DOCUMENT_NODE = 13,
XML_DTD_NODE = 14,
XML_ELEMENT_DECL = 15,
XML_ATTRIBUTE_DECL = 16,
XML_ENTITY_DECL = 17,
XML_NAMESPACE_DECL = 18,
XML_XINCLUDE_START = 19,
XML_XINCLUDE_END = 20
xmlNsPtr = ptr xmlNs
xmlNs {.libxslt.} = object
next: xmlNsPtr
href, prefix: cstring
xmlAttrPtr = ptr xmlAttr
xmlAttr {.libxslt.} = object
name: cstring
next: xmlAttrPtr
children: xmlNodePtr
xmlElementContentPtr = ptr xmlElementContent
xmlElementContent {.libxslt.} = object
encoding: cstring
xmlNodePtr = ptr xmlNode
xmlNode {.libxslt.} = object
`type`: xmlElementType
name: cstring
children, next: xmlNodePtr
content: cstring
properties: xmlAttrPtr
nsDef: xmlNsPtr
xmlDocPtr {.libxslt.} = distinct pointer
xsltStylesheetPtr {.libxslt.} = distinct pointer
proc isNil(x: xmlDocPtr): bool {.borrow.}
proc isNil(x: xsltStylesheetPtr): bool {.borrow.}
proc xmlReadMemory(buf: pointer; len: cint; url, enc: cstring; opts: cint): xmlDocPtr {.libxslt.}
proc xmlReadMemory(buf: string; uri = "noname.xml"): xmlDocPtr =
xmlReadMemory(buf[0].addr, buf.len.cint, uri, "UTF-8", 0)
proc xmlParseFile(filename: cstring): xmlDocPtr {.libxslt.}
proc xmlFreeDoc(p: xmlDocPtr) {.libxslt.}
proc xmlDocGetRootElement(doc: xmlDocPtr): xmlNodePtr {.libxslt.}
proc loadXmlDoc(text: string): xmlDocPtr =
if text.startsWith("/") and fileExists(text):
xmlParseFile(text)
else:
xmlReadMemory(text, "noname.xml")
proc xsltParseStylesheetFile(filename: cstring): xsltStylesheetPtr {.libxslt.}
proc xsltParseStylesheetDoc(doc: xmlDocPtr): xsltStylesheetPtr {.libxslt.}
proc xsltParseStylesheetDoc(text: string; uri = "noname.xml"): xsltStylesheetPtr =
var doc = xmlReadMemory(text, uri)
result = xsltParseStylesheetDoc(doc)
# implicit free of doc
proc loadStylesheet(text: string): xsltStylesheetPtr =
if text.startsWith("/") and fileExists(text):
xsltParseStylesheetFile(text)
else:
xsltParseStylesheetDoc(text, "noname.xsl")
proc xsltApplyStylesheet(
style: xsltStylesheetPtr, doc: xmlDocPtr, params: cstringArray): xmlDocPtr {.libxslt.}
proc xsltFreeStylesheet(style: xsltStylesheetPtr) {.libxslt.}
proc xsltSaveResultToString(txt: ptr pointer; len: ptr cint; res: xmlDocPtr; style: xsltStylesheetPtr): cint {.libxslt.}
proc c_free*(p: pointer) {.importc: "free", header: "<stdlib.h>".}
proc xsltSaveResultToString(res: xmlDocPtr; style: xsltStylesheetPtr): string =
var
txt: pointer
len: cint
if xsltSaveResultToString(addr txt, addr len, res, style) < 0:
raise newException(CatchableError, "xsltSaveResultToString failed")
if len > 0:
result = newString(int len)
copyMem(result[0].addr, txt, len)
c_free(txt)
proc initLibXml =
discard
proc XML_GET_CONTENT(xn: xmlNodePtr): xmlElementContentPtr {.libxslt.}
proc textContent(xn: xmlNodePtr): string =
if xn.content != nil: result = $xn.content
proc content(attr: xmlAttrPtr): string =
var child = attr.children
while not child.isNil:
result.add child.content
child = child.next
proc preserveSiblings(result: var seq[Value]; first: xmlNodePtr) =
var xn = first
while not xn.isNil:
case xn.type
of XML_ELEMENT_NODE:
var child = Value(kind: pkRecord)
if not xn.nsDef.isNil:
child.record.add initDictionary()
var ns = xn.nsDef
while not ns.isNil:
if not ns.href.isNil:
var key = Value(kind: pkString)
if ns.prefix.isNil:
key.string = "xmlns"
else:
key.string = "xmlns:" & $ns.prefix
child.record[0][key] = toPreserves($ns.href)
ns = ns.next
if not xn.properties.isNil:
if child.record.len < 1:
child.record.add initDictionary()
var attr = xn.properties
while not attr.isNil:
var
key = toPreserves($attr.name)
val = toPreserves(attr.content)
child.record[0][key] = val
attr = attr.next
if not xn.children.isNil:
preserveSiblings(child.record, xn.children)
child.record.add tosymbol($xn.name)
result.add child
of XML_TEXT_NODE:
result.add textContent(xn).toPreserves
else:
stderr.writeLine "not an XML_ELEMENT_NODE - ", $xn.type
xn = xn.next
proc toPreservesHook*(xn: xmlNodePtr): Value =
var items = newSeqofCap[Value](1)
preserveSiblings(items, xn)
items[0]
proc spawnXsltActor*(turn: var Turn; root: Cap): Actor {.discardable.} =
spawn("xslt", turn) do (turn: var Turn):
initLibXml()
during(turn, root, ?:XsltArguments) do (ds: Cap):
let sheetsPat = ?Observe(pattern: !XsltTransform) ?? {0: grab(), 1: grab()}
during(turn, ds, sheetsPat) do (stylesheet: Literal[string], input: Literal[string]):
let cur = loadStylesheet(stylesheet.value)
if cur.isNil:
stderr.writeLine "failed to parse stylesheet"
else:
let doc = loadXmlDoc(input.value)
if doc.isNil:
stderr.writeLine "failed to parse input document"
else:
let
params = allocCStringArray([])
res = xsltApplyStylesheet(cur, doc, params)
if res.isNil:
stderr.writeLine "failed to apply stylesheet transformation"
else:
let output = xsltSaveResultToString(res, cur)
deallocCStringArray(params)
publish(turn, ds, XsltTransform(
stylesheet: stylesheet.value,
input: input.value,
output: xmlDocGetRootElement(res).toPreservesHook,
))
xmlFreeDoc(res)
xmlFreeDoc(doc)
xsltFreeStylesheet(cur)
when isMainModule:
import syndicate/relays
runActor("main") do (turn: var Turn; root: Cap):
connectStdio(turn, root)
spawnXsltActor(turn, root)

View File

@ -1,6 +1,6 @@
# Package
version = "20240208"
version = "20240209"
author = "Emery Hemingway"
description = "Utilites for Syndicated Actors and Synit"
license = "unlicense"