1 | /* |
2 | This file is part of the KDE Baloo project. |
3 | SPDX-FileCopyrightText: 2015 Vishesh Handa <me@vhanda.in> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.1-or-later |
6 | */ |
7 | |
8 | #include "enginedebug.h" |
9 | #include "postingdb.h" |
10 | #include "orpostingiterator.h" |
11 | #include "postingcodec.h" |
12 | |
13 | using namespace Baloo; |
14 | |
15 | PostingDB::PostingDB(MDB_dbi dbi, MDB_txn* txn) |
16 | : m_txn(txn) |
17 | , m_dbi(dbi) |
18 | { |
19 | Q_ASSERT(txn != nullptr); |
20 | Q_ASSERT(dbi != 0); |
21 | } |
22 | |
23 | PostingDB::~PostingDB() |
24 | { |
25 | } |
26 | |
27 | MDB_dbi PostingDB::create(MDB_txn* txn) |
28 | { |
29 | MDB_dbi dbi = 0; |
30 | int rc = mdb_dbi_open(txn, name: "postingdb" , MDB_CREATE, dbi: &dbi); |
31 | if (rc) { |
32 | qCWarning(ENGINE) << "PostingDB::create" << mdb_strerror(err: rc); |
33 | return 0; |
34 | } |
35 | |
36 | return dbi; |
37 | } |
38 | |
39 | MDB_dbi PostingDB::open(MDB_txn* txn) |
40 | { |
41 | MDB_dbi dbi = 0; |
42 | int rc = mdb_dbi_open(txn, name: "postingdb" , flags: 0, dbi: &dbi); |
43 | if (rc) { |
44 | qCWarning(ENGINE) << "PostingDB::open" << mdb_strerror(err: rc); |
45 | return 0; |
46 | } |
47 | |
48 | return dbi; |
49 | } |
50 | |
51 | void PostingDB::put(const QByteArray& term, const PostingList& list) |
52 | { |
53 | Q_ASSERT(!term.isEmpty()); |
54 | Q_ASSERT(!list.isEmpty()); |
55 | |
56 | MDB_val key; |
57 | key.mv_size = term.size(); |
58 | key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); |
59 | |
60 | QByteArray arr = PostingCodec::encode(list); |
61 | |
62 | MDB_val val; |
63 | val.mv_size = arr.size(); |
64 | val.mv_data = static_cast<void*>(arr.data()); |
65 | |
66 | int rc = mdb_put(txn: m_txn, dbi: m_dbi, key: &key, data: &val, flags: 0); |
67 | if (rc) { |
68 | qCWarning(ENGINE) << "PostingDB::put" << mdb_strerror(err: rc); |
69 | } |
70 | } |
71 | |
72 | PostingList PostingDB::get(const QByteArray& term) |
73 | { |
74 | Q_ASSERT(!term.isEmpty()); |
75 | |
76 | MDB_val key; |
77 | key.mv_size = term.size(); |
78 | key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); |
79 | |
80 | MDB_val val{.mv_size: 0, .mv_data: nullptr}; |
81 | int rc = mdb_get(txn: m_txn, dbi: m_dbi, key: &key, data: &val); |
82 | if (rc) { |
83 | if (rc != MDB_NOTFOUND) { |
84 | qCDebug(ENGINE) << "PostingDB::get" << term << mdb_strerror(err: rc); |
85 | } |
86 | return PostingList(); |
87 | } |
88 | |
89 | QByteArray arr = QByteArray::fromRawData(data: static_cast<char*>(val.mv_data), size: val.mv_size); |
90 | |
91 | return PostingCodec::decode(arr); |
92 | } |
93 | |
94 | void PostingDB::del(const QByteArray& term) |
95 | { |
96 | Q_ASSERT(!term.isEmpty()); |
97 | |
98 | MDB_val key; |
99 | key.mv_size = term.size(); |
100 | key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); |
101 | |
102 | int rc = mdb_del(txn: m_txn, dbi: m_dbi, key: &key, data: nullptr); |
103 | if (rc != 0 && rc != MDB_NOTFOUND) { |
104 | qCDebug(ENGINE) << "PostingDB::del" << term << mdb_strerror(err: rc); |
105 | } |
106 | } |
107 | |
108 | QVector< QByteArray > PostingDB::fetchTermsStartingWith(const QByteArray& term) |
109 | { |
110 | MDB_val key; |
111 | key.mv_size = term.size(); |
112 | key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); |
113 | |
114 | MDB_cursor* cursor; |
115 | int rc = mdb_cursor_open(txn: m_txn, dbi: m_dbi, cursor: &cursor); |
116 | if (rc) { |
117 | qCWarning(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(err: rc); |
118 | return {}; |
119 | } |
120 | |
121 | QVector<QByteArray> terms; |
122 | rc = mdb_cursor_get(cursor, key: &key, data: nullptr, op: MDB_SET_RANGE); |
123 | while (rc == 0) { |
124 | const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size); |
125 | if (!arr.startsWith(bv: term)) { |
126 | break; |
127 | } |
128 | terms << arr; |
129 | rc = mdb_cursor_get(cursor, key: &key, data: nullptr, op: MDB_NEXT); |
130 | } |
131 | if (rc != MDB_NOTFOUND) { |
132 | qCDebug(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(err: rc); |
133 | } |
134 | |
135 | mdb_cursor_close(cursor); |
136 | return terms; |
137 | } |
138 | |
139 | class DBPostingIterator : public PostingIterator { |
140 | public: |
141 | DBPostingIterator(void* data, uint size); |
142 | quint64 docId() const override; |
143 | quint64 next() override; |
144 | |
145 | private: |
146 | const QVector<quint64> m_vec; |
147 | int m_pos; |
148 | }; |
149 | |
150 | PostingIterator* PostingDB::iter(const QByteArray& term) |
151 | { |
152 | MDB_val key; |
153 | key.mv_size = term.size(); |
154 | key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); |
155 | |
156 | MDB_val val; |
157 | int rc = mdb_get(txn: m_txn, dbi: m_dbi, key: &key, data: &val); |
158 | if (rc) { |
159 | qCDebug(ENGINE) << "PostingDB::iter" << term << mdb_strerror(err: rc); |
160 | return nullptr; |
161 | } |
162 | |
163 | return new DBPostingIterator(val.mv_data, val.mv_size); |
164 | } |
165 | |
166 | // |
167 | // Posting Iterator |
168 | // |
169 | DBPostingIterator::DBPostingIterator(void* data, uint size) |
170 | : m_vec(PostingCodec().decode(arr: QByteArray(static_cast<char*>(data), size))) |
171 | , m_pos(-1) |
172 | { |
173 | } |
174 | |
175 | quint64 DBPostingIterator::docId() const |
176 | { |
177 | if (m_pos < 0 || m_pos >= m_vec.size()) { |
178 | return 0; |
179 | } |
180 | |
181 | return m_vec[m_pos]; |
182 | } |
183 | |
184 | quint64 DBPostingIterator::next() |
185 | { |
186 | if (m_pos >= m_vec.size() - 1) { |
187 | m_pos = m_vec.size(); |
188 | return 0; |
189 | } |
190 | |
191 | m_pos++; |
192 | return m_vec[m_pos]; |
193 | } |
194 | |
195 | template <typename Validator> |
196 | PostingIterator* PostingDB::iter(const QByteArray& prefix, Validator validate) |
197 | { |
198 | Q_ASSERT(!prefix.isEmpty()); |
199 | |
200 | MDB_val key; |
201 | key.mv_size = prefix.size(); |
202 | key.mv_data = static_cast<void*>(const_cast<char*>(prefix.constData())); |
203 | |
204 | MDB_cursor* cursor; |
205 | int rc = mdb_cursor_open(txn: m_txn, dbi: m_dbi, cursor: &cursor); |
206 | |
207 | if (rc) { |
208 | qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(err: rc); |
209 | return nullptr; |
210 | } |
211 | |
212 | QVector<PostingIterator*> termIterators; |
213 | |
214 | MDB_val val; |
215 | rc = mdb_cursor_get(cursor, key: &key, data: &val, op: MDB_SET_RANGE); |
216 | while (rc == 0) { |
217 | const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size); |
218 | if (!arr.startsWith(bv: prefix)) { |
219 | break; |
220 | } |
221 | if (validate(arr)) { |
222 | termIterators << new DBPostingIterator(val.mv_data, val.mv_size); |
223 | } |
224 | rc = mdb_cursor_get(cursor, key: &key, data: &val, op: MDB_NEXT); |
225 | } |
226 | |
227 | if (rc != 0 && rc != MDB_NOTFOUND) { |
228 | qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(err: rc); |
229 | } |
230 | |
231 | mdb_cursor_close(cursor); |
232 | if (termIterators.isEmpty()) { |
233 | return nullptr; |
234 | } |
235 | return new OrPostingIterator(termIterators); |
236 | } |
237 | |
238 | PostingIterator* PostingDB::prefixIter(const QByteArray& prefix) |
239 | { |
240 | auto validate = [] (const QByteArray& arr) { |
241 | Q_UNUSED(arr); |
242 | return true; |
243 | }; |
244 | return iter(prefix, validate); |
245 | } |
246 | |
247 | PostingIterator* PostingDB::regexpIter(const QRegularExpression& regexp, const QByteArray& prefix) |
248 | { |
249 | int prefixLen = prefix.length(); |
250 | auto validate = [®exp, prefixLen] (const QByteArray& arr) { |
251 | QString term = QString::fromUtf8(ba: arr.mid(index: prefixLen)); |
252 | return regexp.match(subject: term).hasMatch(); |
253 | }; |
254 | |
255 | return iter(prefix, validate); |
256 | } |
257 | |
258 | PostingIterator* PostingDB::compIter(const QByteArray& prefix, qlonglong comVal, PostingDB::Comparator com) |
259 | { |
260 | int prefixLen = prefix.length(); |
261 | auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { |
262 | bool ok = false; |
263 | auto val = QByteArray::fromRawData(data: arr.constData() + prefixLen, size: arr.length() - prefixLen).toLongLong(ok: &ok); |
264 | return ok && ((com == LessEqual && val <= comVal) || (com == GreaterEqual && val >= comVal)); |
265 | }; |
266 | return iter(prefix, validate); |
267 | } |
268 | |
269 | PostingIterator* PostingDB::compIter(const QByteArray& prefix, double comVal, PostingDB::Comparator com) |
270 | { |
271 | int prefixLen = prefix.length(); |
272 | auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { |
273 | bool ok = false; |
274 | auto val = QByteArray::fromRawData(data: arr.constData() + prefixLen, size: arr.length() - prefixLen).toDouble(ok: &ok); |
275 | return ok && ((com == LessEqual && val <= comVal) || |
276 | (com == GreaterEqual && val >= comVal)); |
277 | }; |
278 | return iter(prefix, validate); |
279 | } |
280 | |
281 | PostingIterator* PostingDB::compIter(const QByteArray& prefix, const QByteArray& comVal, PostingDB::Comparator com) |
282 | { |
283 | int prefixLen = prefix.length(); |
284 | auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { |
285 | auto val = QByteArray::fromRawData(data: arr.constData() + prefixLen, size: arr.length() - prefixLen); |
286 | return ((com == LessEqual && val <= comVal) || |
287 | (com == GreaterEqual && val >= comVal)); |
288 | }; |
289 | return iter(prefix, validate); |
290 | } |
291 | |
292 | QMap<QByteArray, PostingList> PostingDB::toTestMap() const |
293 | { |
294 | MDB_cursor* cursor; |
295 | mdb_cursor_open(txn: m_txn, dbi: m_dbi, cursor: &cursor); |
296 | |
297 | MDB_val key = {.mv_size: 0, .mv_data: nullptr}; |
298 | MDB_val val; |
299 | |
300 | QMap<QByteArray, PostingList> map; |
301 | while (1) { |
302 | int rc = mdb_cursor_get(cursor, key: &key, data: &val, op: MDB_NEXT); |
303 | if (rc == MDB_NOTFOUND) { |
304 | break; |
305 | } |
306 | if (rc) { |
307 | qCDebug(ENGINE) << "PostingDB::toTestMap" << mdb_strerror(err: rc); |
308 | break; |
309 | } |
310 | |
311 | const QByteArray ba(static_cast<char*>(key.mv_data), key.mv_size); |
312 | const PostingList plist = PostingCodec::decode(arr: QByteArray(static_cast<char*>(val.mv_data), val.mv_size)); |
313 | map.insert(key: ba, value: plist); |
314 | } |
315 | |
316 | mdb_cursor_close(cursor); |
317 | return map; |
318 | } |
319 | |