Querying and document retrieval
This commit is contained in:
parent
7d532d71dd
commit
bf722fa5b8
|
@ -2,6 +2,9 @@ version 1 .
|
|||
|
||||
DatabaseInfo = <xapian @uuid string @path string> .
|
||||
|
||||
Document = <document @id int @data bytes> .
|
||||
Document = <document @id int @data string> .
|
||||
|
||||
Query = <query @label any @expression string @lang symbol> .
|
||||
Match = <match @label any @docid int @weight float @rank int> .
|
||||
|
||||
ValueSlots = {int: any ...:...} .
|
||||
|
|
|
@ -6,8 +6,6 @@ import ./xapian_actor/[protocol, xapian]
|
|||
|
||||
type
|
||||
Value = Preserve[void]
|
||||
|
||||
type
|
||||
DatabaseArg {.preservesDictionary.} = object
|
||||
database: string
|
||||
DataspaceArg {.preservesDictionary.} = object
|
||||
|
@ -15,11 +13,37 @@ type
|
|||
|
||||
runActor("main") do (root: Ref; turn: var Turn):
|
||||
connectStdio(root, turn)
|
||||
|
||||
during(turn, root, ?DatabaseArg) do (path: string):
|
||||
let db = initDatabase(path)
|
||||
|
||||
during(turn, root, ?DataspaceArg) do (ds: Ref):
|
||||
discard publish(turn, ds, DatabaseInfo(uuid: db.uuid, path: path))
|
||||
|
||||
during(turn, ds, ?Observe(pattern: !protocol.Document) ?? {0: grabLit()}) do (id: BiggestInt):
|
||||
discard publish(turn, ds, protocol.Document(
|
||||
id: id,
|
||||
data: $db.getDocument(DocId id).data,
|
||||
))
|
||||
|
||||
during(turn, ds, ?protocol.Query) do (label: Value, expr: string, lang: Symbol):
|
||||
let enq = initEnquire(db)
|
||||
block:
|
||||
let qp = initQueryParser()
|
||||
let stem = initStem(string lang)
|
||||
qp.setStemmer(stem)
|
||||
qp.setStemmingStrategy(STEM_SOME);
|
||||
qp.setDatabase(db)
|
||||
enq.setQuery(qp.parseQuery(expr))
|
||||
block:
|
||||
let matches = enq.getMset(0, db.getDocCount)
|
||||
for e in matches:
|
||||
discard publish(turn, ds, Match(
|
||||
label: label,
|
||||
docid: BiggestInt e.docid,
|
||||
weight: e.weight,
|
||||
rank: BiggestInt e.rank,
|
||||
))
|
||||
do:
|
||||
close(db)
|
||||
# close database on path retraction
|
||||
|
|
|
@ -5,15 +5,26 @@ import
|
|||
type
|
||||
Document* {.preservesRecord: "document".} = object
|
||||
`id`*: BiggestInt
|
||||
`data`*: seq[byte]
|
||||
`data`*: string
|
||||
|
||||
Match* {.preservesRecord: "match".} = object
|
||||
`label`*: Preserve[void]
|
||||
`docid`*: BiggestInt
|
||||
`weight`*: float32
|
||||
`rank`*: BiggestInt
|
||||
|
||||
ValueSlots* = Table[BiggestInt, Preserve[void]]
|
||||
Query* {.preservesRecord: "query".} = object
|
||||
`label`*: Preserve[void]
|
||||
`expression`*: string
|
||||
`lang`*: Symbol
|
||||
|
||||
DatabaseInfo* {.preservesRecord: "xapian".} = object
|
||||
`uuid`*: string
|
||||
`path`*: string
|
||||
|
||||
proc `$`*(x: Document | ValueSlots | DatabaseInfo): string =
|
||||
proc `$`*(x: Document | Match | ValueSlots | Query | DatabaseInfo): string =
|
||||
`$`(toPreserve(x))
|
||||
|
||||
proc encode*(x: Document | ValueSlots | DatabaseInfo): seq[byte] =
|
||||
proc encode*(x: Document | Match | ValueSlots | Query | DatabaseInfo): seq[byte] =
|
||||
encode(toPreserve(x))
|
||||
|
|
|
@ -219,20 +219,19 @@ type MSetItem* = object
|
|||
docid*: DocId
|
||||
weight*: float64
|
||||
rank*: DocCount
|
||||
percent*: int
|
||||
collapseKey*: string
|
||||
collapseCount*: DocCount
|
||||
# collapse_key*: string
|
||||
# collapse_count*: DocCount
|
||||
|
||||
proc initMSetItem(iter: MSetIterator; mset: MSet): MSetItem =
|
||||
result = MSetItem(
|
||||
mset: mset,
|
||||
firstitem: mset.get_firstitem(),
|
||||
docid: iter.get_docid(),
|
||||
weight: iter.get_weight(),
|
||||
rank: iter.get_rank(),
|
||||
percent: iter.get_percent(),
|
||||
collapse_key: $iter.get_collapse_key(),
|
||||
collapse_count: iter.get_collapse_count())
|
||||
mset: mset,
|
||||
firstitem: mset.get_firstitem(),
|
||||
docid: iter.get_docid(),
|
||||
weight: iter.get_weight(),
|
||||
rank: iter.get_rank(),
|
||||
# collapse_key: $iter.get_collapse_key(),
|
||||
# collapse_count: iter.get_collapse_count(),
|
||||
)
|
||||
|
||||
proc document*(item: MSetItem): Document =
|
||||
item.mset[item.rank].get_document()
|
||||
|
@ -249,6 +248,9 @@ iterator items*(mset: MSet): MSetItem =
|
|||
|
||||
# QueryParser
|
||||
|
||||
proc initQueryParser*(): QueryParser {.
|
||||
constructor, importcpp: "Xapian::QueryParser()".}
|
||||
|
||||
type StemStrategy* {.importcpp: "Xapian::QueryParser::stem_strategy".} = enum
|
||||
STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS
|
||||
|
||||
|
|
|
@ -4,6 +4,6 @@ bin = @["xapian_actor"]
|
|||
description = "Syndicate actor for accessing Xapian databases"
|
||||
license = "Unlicense"
|
||||
srcDir = "src"
|
||||
version = "20220421"
|
||||
version = "20230610"
|
||||
|
||||
requires "nim >= 1.6.4", "syndicate >= 20230609"
|
||||
|
|
Loading…
Reference in New Issue