From ecd5ee5eb09c794cac8400ff976e74173e03f0d8 Mon Sep 17 00:00:00 2001 From: Emery Hemingway Date: Mon, 19 Sep 2022 09:17:36 -0500 Subject: [PATCH] Initial commit --- src/xapian.nim | 280 +++++++++++++++++++++++++++++++++++++++++++++++++ xapian.nimble | 12 +++ 2 files changed, 292 insertions(+) create mode 100644 src/xapian.nim create mode 100644 xapian.nimble diff --git a/src/xapian.nim b/src/xapian.nim new file mode 100644 index 0000000..31e0b63 --- /dev/null +++ b/src/xapian.nim @@ -0,0 +1,280 @@ +# SPDX-FileCopyrightText: 2022 Emery Hemingway +# +# SPDX-License-Identifier: GPL-2.0-or-later + +{.passC: staticExec("pkg-config --cflags xapian-core").} +{.passL: staticExec("pkg-config --libs xapian-core").} + +{.pragma: xapianHeader, header: "xapian.h".} +{.pragma: importXapian, xapianHeader, importcpp: "Xapian::$1".} +{.pragma: importGetter, importcpp: "#.get_$1()".} + +type CppString {.importcpp: "std::string", header: "", byRef.} = object +proc data(s: CppString): pointer {.importcpp.} +proc length(s: CppString): csize_t {.importcpp.} + +proc basic_string(str: ptr char; len: int): CppString {. + constructor, importcpp: "std::basic_string(@)".} + +proc toCpp(s: string): CppString = basic_string(unsafeAddr s[0], s.len) + +proc `$`(cpp: CppString): string = + if cpp.length > 0: + result.setLen(cpp.length) + copyMem(addr result[0], cpp.data, result.len) + +const + DB_CREATE_OR_OPEN* = 0x00 + DB_CREATE_OR_OVERWRITE* = 0x01 + DB_CREATE* = 0x02 + DB_OPEN* = 0x03 + DB_NO_SYNC* = 0x04 + DB_FULL_SYNC* = 0x08 + DB_DANGEROUS* = 0x10 + DB_NO_TERMLIST* = 0x20 + DB_RETRY_LOCK* = 0x40 + DB_BACKEND_GLASS* = 0x100 + DB_BACKEND_CHERT* = 0x200 + DB_BACKEND_STUB* = 0x300 + DB_BACKEND_INMEMORY* = 0x400 + DB_BACKEND_HONEY* = 0x500 + +when defined(enable_64bit_docid): + type + DocId* = distinct uint64 + DocCount* = uint64 +else: + type + DocId* = distinct uint32 + DocCount* = uint32 + +when defined(enable_64bit_termcount): + type TermCount* = uint64 +else: + type TermCount* = uint32 + +when defined(enable_64bit_termpos): + type TermPos* = uint64 +else: + type TermPos* = uint32 + +proc `$`*(id: DocId): string {.borrow.} +proc `==`*(a, b: DocId): bool {.borrow.} + +type + Rev* = int64 + TotalLength* = int64 + + Error* {.importXapian, inheritable.} = object + LogicError* {.importXapian.} = object of Error + RuntimeError* {.importXapian.} = object of Error + AssertionError* {.importXapian.} = object of LogicError + InvalidArgumentError* {.importXapian.} = object of LogicError + InvalidOperationError* {.importXapian.} = object of LogicError + UnimplementedError* {.importXapian.} = object of LogicError + DatabaseError* {.importXapian.} = object of RuntimeError + DatabaseCorruptError* {.importXapian.} = object of DatabaseError + DatabaseCreateError* {.importXapian.} = object of DatabaseError + DatabaseLockError* {.importXapian.} = object of DatabaseError + DatabaseModifiedError* {.importXapian.} = object of DatabaseError + DatabaseOpeningError* {.importXapian.} = object of DatabaseError + DatabaseVersionError* {.importXapian.} = object of DatabaseOpeningError + DocNotFoundError* {.importXapian.} = object of RuntimeError + FeatureUnavailableError* {.importXapian.} = object of RuntimeError + InternalError* {.importXapian.} = object of RuntimeError + NetworkError* {.importXapian.} = object of RuntimeError + NetworkTimeoutError* {.importXapian.} = object of NetworkError + QueryParserError* {.importXapian.} = object of RuntimeError + SerialisationError* {.importXapian.} = object of RuntimeError + RangeError* {.importXapian.} = object of RuntimeError + WildcardError* {.importXapian.} = object of RuntimeError + DatabaseNotFoundError* {.importXapian.} = object of DatabaseOpeningError + DatabaseClosedError* {.importXapian.} = object of DatabaseError + + Database* {.importXapian, byRef.} = object + WritableDatabase* {.importXapian, byRef.} = object + Document* {.importXapian, byRef.} = object + Enquire* {.importXapian, byRef.} = object + MSet* {.importXapian, byRef.} = object + MSetIterator {.importXapian, byRef.} = object + Query* {.importXapian, byRef.} = object + QueryParser* {.importXapian, byRef.} = object + Stem* {.importXapian, byRef.} = object + TermGenerator* {.importXapian, byRef.} = object + +proc sortable_serialise(n: cdouble): CppString {.importcpp: "Xapian::$1(@)".} +proc sortable_unserialise(s: CppString): cdouble {.importcpp: "Xapian::$1(@)".} + +# Database + +proc initDatabase*(path: cstring; flags = cint 0): Database {. + constructor, importcpp: "Xapian::Database(@)".} + +proc reopen*(db: Database): bool {.importcpp.} +proc close*(db: Database) {.importcpp.} +proc has_positions*(db: Database): bool {.importcpp.} +proc get_doccount*(db: Database): DocCount {.importcpp.} +proc get_lastdocid*(db: Database): DocId {.importcpp.} +proc get_average_length*(db: Database): float64 {.importcpp.} +proc get_total_length*(db: Database): TotalLength {.importcpp.} +proc get_document*(db: Database; id: DocId; flags = cint 0): Document {.importcpp.} +proc get_uuid(db: Database): CppString {.importcpp.} +proc uuid*(db: Database): string = $get_uuid(db) +proc locked*(db: Database): bool {.importcpp.} +proc lock*(db: Database; flags = cint 0): WritableDatabase {.importcpp.} +proc unlock*(db: Database|WritableDatabase): Database {.importcpp.} + +proc initWritableDatabase*(path: cstring; flags = cint 0; blockSize = cint 0): WritableDatabase {. + constructor, importcpp: "Xapian::WritableDatabase(@)".} + +proc add_document*(db: WritableDatabase; doc: Document): DocId {.importcpp.} + + +# Document + +proc get_data(doc: Document): CppString {.importcpp.} + +proc data*(doc: Document): string = $get_data(doc) + +proc set_data(doc: Document; data: ptr char; len: int) {. + importcpp: "#.set_data(std::basic_string(@))".} + +proc `data=`*(doc: Document; data: string) = + doc.set_data(unsafeAddr data[0], len data) + +proc get_value(doc: Document; slot: Natural): CppString {.importcpp.} +proc add_value(doc: Document; slot: Natural; s: CppString) {.importcpp.} +proc remove_value(doc: Document; slot: Natural) {.importcpp.} +proc clear_values*(doc: Document) {.importcpp.} +proc values_count*(doc: Document): cuint {.importcpp.} + +proc `[]`*(doc: Document; slot: Natural): string = + $get_value(doc, slot) +proc `[]=`*(doc: Document; slot: Natural; val: string) = + add_value(doc, slot, val.toCpp) +proc `[]=`*(doc: Document; slot: Natural; val: SomeNumber) = + add_value(doc, slot, sortable_serialise(cdouble val)) + +proc value*[T : SomeNumber](doc: Document; slot: Natural; default: T): T = + var val = doc.get_value(slot) + if val.length == 0: + result = default + else: + result = T sortable_unserialise(val) + +proc `del`*(doc: Document; slot: Natural) = + remove_value(doc, slot) + +# Enquire + +proc initEnquire*(db: Database): Enquire {. + constructor, importcpp: "Xapian::Enquire(@)".} + +proc `query`*(e: Enquire): Query {. + importcpp: "#.get_query(@)".} + +proc `query=`*(e: Enquire; q: Query) {. + importcpp: "#.set_query(@)".} + +proc `set_query`*(e: Enquire; q: Query; query_length = TermCount 0) {. + importcpp.} + +proc get_mset*(e: Enquire; + first, maxItems: DocCount; + checkAtLeast = DocCount 0): MSet {. + importcpp.} + +# MSet +proc size*(m: MSet): DocCount {.importcpp.} +proc get_matches_estimated*(m: MSet): DocCount {.importcpp.} +proc get_matches_upper_bound*(m: MSet): DocCount {.importcpp.} +proc get_matches_lower_bound*(m: MSet): DocCount {.importcpp.} +proc get_uncollapsed_matches_lower_bound*(m: MSet): DocCount {.importcpp.} +proc get_uncollapsed_matches_estimated*(m: MSet): DocCount {.importcpp.} +proc get_uncollapsed_matches_upper_bound*(m: MSet): DocCount {.importcpp.} +proc get_max_attained*(m: MSet): DocCount {.importcpp.} +proc get_max_possible*(m: MSet): DocCount {.importcpp.} +proc get_firstitem*(m: MSet): DocCount {.importcpp.} + +proc begin(m: MSet): MSetIterator {.importcpp.} +proc `end`(m: MSet): MSetIterator {.importcpp.} +proc `[]`*(m: MSet; i: int|DocCount): MSetIterator {.importcpp: "#[#]".} + +proc next(iter: MSetIterator) {.importcpp: "++#".} + +proc `==`(a, b: MSetIterator): bool {.importcpp: "# == #".} + +proc get_docid*(iter: MSetIterator): DocId {.importcpp: "*#".} # C++ dumbfucks +proc get_rank(iter: MSetIterator): DocCount {.importcpp.} +proc get_document*(iter: MSetIterator): Document {.importcpp.} +proc get_weight*(iter: MSetIterator): float {.importcpp.} +proc get_collapse_key*(iter: MSetIterator): CppString {.importcpp.} +proc get_collapse_count*(iter: MSetIterator): DocCount {.importcpp.} +proc get_sort_key*(iter: MSetIterator): CppString {.importcpp.} +proc get_percent*(iter: MSetIterator): int {.importcpp.} + +type MSetItem* = object + mset: MSet + firstItem: DocCount + docid*: DocId + weight*: float64 + rank*: DocCount + percent*: int + collapseKey*: string + collapseCount*: DocCount + +proc initMSetItem(iter: MSetIterator; mset: MSet): MSetItem = + result = MSetItem( + mset: mset, + firstitem: mset.get_firstitem(), + docid: iter.get_docid(), + weight: iter.get_weight(), + rank: iter.get_rank(), + percent: iter.get_percent(), + collapse_key: $iter.get_collapse_key(), + collapse_count: iter.get_collapse_count()) + +proc document*(item: MSetItem): Document = + item.mset[item.rank].get_document() + +iterator items*(mset: MSet): MSetItem = + var iter = mset.begin() + let `end` = mset.`end`() + while iter != `end`: + var item = initMSetItem(iter, mset) + yield item + next(iter) + +# Query + +# QueryParser + +type StemStrategy* {.importcpp: "Xapian::QueryParser::stem_strategy".} = enum + STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS + +proc set_stemmer*(qp: QueryParser; s: Stem) {.importcpp.} +proc set_stemming_strategy*(qp: QueryParser; s: StemStrategy) {.importcpp.} +proc set_database*(qp: QueryParser; db: Database) {.importcpp.} + +proc parse_query*(qp: QueryParser; query: cstring): Query {.importcpp.} + +# Stem + +proc initStem*(language: cstring; fallback = false): Stem {. + constructor, importcpp: "Xapian::Stem(@)".} + +proc `document`*(tg: TermGenerator; doc: Document) {. + importcpp: "#.get_document(@)".} + +proc `document=`*(tg: TermGenerator; data: Document) {. + importcpp: "#.set_document(@)".} + +proc index_text*(tg: TermGenerator; + text: cstring; wdf_inc = TermCount 1; prefix = cstring "") {. + importcpp.} + +proc set_stemmer*(tg: TermGenerator; stemmer: Stem) {.importcpp.} + +type Describeable = Enquire | Database | Document | MSet | MSetIterator | Query | QueryParser | Stem +proc get_description(x: Describeable): CppString {.importcpp.} +proc `$`*(x: Describeable): string = $get_description(x) diff --git a/xapian.nimble b/xapian.nimble new file mode 100644 index 0000000..285d2c7 --- /dev/null +++ b/xapian.nimble @@ -0,0 +1,12 @@ +# Package + +version = "20220421" +author = "Emery Hemingway" +description = "Xapian library wrapper" +license = "GPL-2.0-or-later" +srcDir = "src" + + +# Dependencies + +requires "nim >= 1.6.4"