Querying and document retrieval

This commit is contained in:
Emery Hemingway 2023-06-10 16:59:50 +01:00
parent 7d532d71dd
commit bf722fa5b8
5 changed files with 58 additions and 18 deletions

View File

@ -2,6 +2,9 @@ version 1 .
DatabaseInfo = <xapian @uuid string @path string> .
Document = <document @id int @data bytes> .
Document = <document @id int @data string> .
Query = <query @label any @expression string @lang symbol> .
Match = <match @label any @docid int @weight float @rank int> .
ValueSlots = {int: any ...:...} .

View File

@ -6,8 +6,6 @@ import ./xapian_actor/[protocol, xapian]
type
Value = Preserve[void]
type
DatabaseArg {.preservesDictionary.} = object
database: string
DataspaceArg {.preservesDictionary.} = object
@ -15,11 +13,37 @@ type
runActor("main") do (root: Ref; turn: var Turn):
connectStdio(root, turn)
during(turn, root, ?DatabaseArg) do (path: string):
let db = initDatabase(path)
during(turn, root, ?DataspaceArg) do (ds: Ref):
discard publish(turn, ds, DatabaseInfo(uuid: db.uuid, path: path))
during(turn, ds, ?Observe(pattern: !protocol.Document) ?? {0: grabLit()}) do (id: BiggestInt):
discard publish(turn, ds, protocol.Document(
id: id,
data: $db.getDocument(DocId id).data,
))
during(turn, ds, ?protocol.Query) do (label: Value, expr: string, lang: Symbol):
let enq = initEnquire(db)
block:
let qp = initQueryParser()
let stem = initStem(string lang)
qp.setStemmer(stem)
qp.setStemmingStrategy(STEM_SOME);
qp.setDatabase(db)
enq.setQuery(qp.parseQuery(expr))
block:
let matches = enq.getMset(0, db.getDocCount)
for e in matches:
discard publish(turn, ds, Match(
label: label,
docid: BiggestInt e.docid,
weight: e.weight,
rank: BiggestInt e.rank,
))
do:
close(db)
# close database on path retraction

View File

@ -5,15 +5,26 @@ import
type
Document* {.preservesRecord: "document".} = object
`id`*: BiggestInt
`data`*: seq[byte]
`data`*: string
Match* {.preservesRecord: "match".} = object
`label`*: Preserve[void]
`docid`*: BiggestInt
`weight`*: float32
`rank`*: BiggestInt
ValueSlots* = Table[BiggestInt, Preserve[void]]
Query* {.preservesRecord: "query".} = object
`label`*: Preserve[void]
`expression`*: string
`lang`*: Symbol
DatabaseInfo* {.preservesRecord: "xapian".} = object
`uuid`*: string
`path`*: string
proc `$`*(x: Document | ValueSlots | DatabaseInfo): string =
proc `$`*(x: Document | Match | ValueSlots | Query | DatabaseInfo): string =
`$`(toPreserve(x))
proc encode*(x: Document | ValueSlots | DatabaseInfo): seq[byte] =
proc encode*(x: Document | Match | ValueSlots | Query | DatabaseInfo): seq[byte] =
encode(toPreserve(x))

View File

@ -219,20 +219,19 @@ type MSetItem* = object
docid*: DocId
weight*: float64
rank*: DocCount
percent*: int
collapseKey*: string
collapseCount*: DocCount
# collapse_key*: string
# collapse_count*: DocCount
proc initMSetItem(iter: MSetIterator; mset: MSet): MSetItem =
result = MSetItem(
mset: mset,
firstitem: mset.get_firstitem(),
docid: iter.get_docid(),
weight: iter.get_weight(),
rank: iter.get_rank(),
percent: iter.get_percent(),
collapse_key: $iter.get_collapse_key(),
collapse_count: iter.get_collapse_count())
mset: mset,
firstitem: mset.get_firstitem(),
docid: iter.get_docid(),
weight: iter.get_weight(),
rank: iter.get_rank(),
# collapse_key: $iter.get_collapse_key(),
# collapse_count: iter.get_collapse_count(),
)
proc document*(item: MSetItem): Document =
item.mset[item.rank].get_document()
@ -249,6 +248,9 @@ iterator items*(mset: MSet): MSetItem =
# QueryParser
proc initQueryParser*(): QueryParser {.
constructor, importcpp: "Xapian::QueryParser()".}
type StemStrategy* {.importcpp: "Xapian::QueryParser::stem_strategy".} = enum
STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS

View File

@ -4,6 +4,6 @@ bin = @["xapian_actor"]
description = "Syndicate actor for accessing Xapian databases"
license = "Unlicense"
srcDir = "src"
version = "20220421"
version = "20230610"
requires "nim >= 1.6.4", "syndicate >= 20230609"