1 | /* |
2 | This file is part of the KDE Baloo project. |
3 | SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.1-or-later |
6 | */ |
7 | |
8 | #include "writetransaction.h" |
9 | #include "transaction.h" |
10 | |
11 | #include "postingdb.h" |
12 | #include "documentdb.h" |
13 | #include "documentiddb.h" |
14 | #include "positiondb.h" |
15 | #include "documenttimedb.h" |
16 | #include "documentdatadb.h" |
17 | #include "mtimedb.h" |
18 | #include "idutils.h" |
19 | |
20 | using namespace Baloo; |
21 | |
22 | void WriteTransaction::addDocument(const Document& doc) |
23 | { |
24 | quint64 id = doc.id(); |
25 | |
26 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
27 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
28 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
29 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
30 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
31 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
32 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
33 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
34 | |
35 | Q_ASSERT(!documentTermsDB.contains(id)); |
36 | Q_ASSERT(!documentXattrTermsDB.contains(id)); |
37 | Q_ASSERT(!documentFileNameTermsDB.contains(id)); |
38 | Q_ASSERT(!docTimeDB.contains(id)); |
39 | Q_ASSERT(!docDataDB.contains(id)); |
40 | Q_ASSERT(!contentIndexingDB.contains(id)); |
41 | Q_ASSERT(doc.parentId()); |
42 | |
43 | { |
44 | auto url = doc.url(); |
45 | int pos = url.lastIndexOf(c: '/'); |
46 | auto filename = url.mid(index: pos + 1); |
47 | auto parentId = doc.parentId(); |
48 | |
49 | if (pos > 0) { |
50 | docUrlDB.addPath(url: url.left(len: pos)); |
51 | } else { |
52 | parentId = 0; |
53 | } |
54 | |
55 | if (!docUrlDB.put(docId: id, parentId, fileName: filename)) { |
56 | return; |
57 | } |
58 | } |
59 | |
60 | QVector<QByteArray> docTerms = addTerms(id, terms: doc.m_terms); |
61 | Q_ASSERT(!docTerms.empty()); |
62 | documentTermsDB.put(docId: id, list: docTerms); |
63 | |
64 | QVector<QByteArray> docXattrTerms = addTerms(id, terms: doc.m_xattrTerms); |
65 | if (!docXattrTerms.isEmpty()) { |
66 | documentXattrTermsDB.put(docId: id, list: docXattrTerms); |
67 | } |
68 | |
69 | QVector<QByteArray> docFileNameTerms = addTerms(id, terms: doc.m_fileNameTerms); |
70 | if (!docFileNameTerms.isEmpty()) { |
71 | documentFileNameTermsDB.put(docId: id, list: docFileNameTerms); |
72 | } |
73 | |
74 | if (doc.contentIndexing()) { |
75 | contentIndexingDB.put(docId: doc.id()); |
76 | } |
77 | |
78 | DocumentTimeDB::TimeInfo info; |
79 | info.mTime = doc.m_mTime; |
80 | info.cTime = doc.m_cTime; |
81 | |
82 | docTimeDB.put(docId: id, info); |
83 | mtimeDB.put(mtime: doc.m_mTime, docId: id); |
84 | |
85 | if (!doc.m_data.isEmpty()) { |
86 | docDataDB.put(docId: id, data: doc.m_data); |
87 | } |
88 | } |
89 | |
90 | QVector<QByteArray> WriteTransaction::addTerms(quint64 id, const QMap<QByteArray, Document::TermData>& terms) |
91 | { |
92 | QVector<QByteArray> termList; |
93 | termList.reserve(asize: terms.size()); |
94 | m_pendingOperations.reserve(size: m_pendingOperations.size() + terms.size()); |
95 | |
96 | for (auto it = terms.cbegin(), end = terms.cend(); it != end; ++it) { |
97 | const QByteArray& term = it.key(); |
98 | termList.append(t: term); |
99 | |
100 | Operation op; |
101 | op.type = AddId; |
102 | op.data.docId = id; |
103 | op.data.positions = it.value().positions; |
104 | |
105 | m_pendingOperations[term].append(t: op); |
106 | } |
107 | |
108 | return termList; |
109 | } |
110 | |
111 | void WriteTransaction::removeDocument(quint64 id) |
112 | { |
113 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
114 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
115 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
116 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
117 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
118 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
119 | DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn); |
120 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
121 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
122 | |
123 | removeTerms(id, terms: documentTermsDB.get(docId: id)); |
124 | removeTerms(id, terms: documentFileNameTermsDB.get(docId: id)); |
125 | if (documentXattrTermsDB.contains(docId: id)) { |
126 | removeTerms(id, terms: documentXattrTermsDB.get(docId: id)); |
127 | } |
128 | |
129 | documentTermsDB.del(docId: id); |
130 | documentXattrTermsDB.del(docId: id); |
131 | documentFileNameTermsDB.del(docId: id); |
132 | |
133 | docUrlDB.del(docId: id); |
134 | |
135 | contentIndexingDB.del(docID: id); |
136 | failedIndexingDB.del(docID: id); |
137 | |
138 | DocumentTimeDB::TimeInfo info = docTimeDB.get(docId: id); |
139 | docTimeDB.del(docId: id); |
140 | mtimeDB.del(mtime: info.mTime, docId: id); |
141 | |
142 | docDataDB.del(docId: id); |
143 | } |
144 | |
145 | void WriteTransaction::removeTerms(quint64 id, const QVector<QByteArray>& terms) |
146 | { |
147 | for (const QByteArray& term : terms) { |
148 | Operation op; |
149 | op.type = RemoveId; |
150 | op.data.docId = id; |
151 | |
152 | m_pendingOperations[term].append(t: op); |
153 | } |
154 | } |
155 | |
156 | void WriteTransaction::removeRecursively(quint64 parentId) |
157 | { |
158 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
159 | |
160 | const QVector<quint64> children = docUrlDB.getChildren(docId: parentId); |
161 | for (quint64 id : children) { |
162 | if (id) { |
163 | removeRecursively(parentId: id); |
164 | } |
165 | } |
166 | removeDocument(id: parentId); |
167 | } |
168 | |
169 | bool WriteTransaction::removeRecursively(quint64 parentId, const std::function<bool(quint64)> &shouldDelete) |
170 | { |
171 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
172 | |
173 | if (parentId && !shouldDelete(parentId)) { |
174 | return false; |
175 | } |
176 | |
177 | bool isEmpty = true; |
178 | const QVector<quint64> children = docUrlDB.getChildren(docId: parentId); |
179 | for (quint64 id : children) { |
180 | isEmpty &= removeRecursively(parentId: id, shouldDelete); |
181 | } |
182 | // refetch |
183 | if (isEmpty && docUrlDB.getChildren(docId: parentId).isEmpty()) { |
184 | removeDocument(id: parentId); |
185 | return true; |
186 | } |
187 | return false; |
188 | } |
189 | |
190 | void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations) |
191 | { |
192 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
193 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
194 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
195 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
196 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
197 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
198 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
199 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
200 | |
201 | const quint64 id = doc.id(); |
202 | |
203 | if (operations & DocumentTerms) { |
204 | Q_ASSERT(!doc.m_terms.isEmpty()); |
205 | QVector<QByteArray> prevTerms = documentTermsDB.get(docId: id); |
206 | QVector<QByteArray> docTerms = replaceTerms(id, prevTerms, terms: doc.m_terms); |
207 | |
208 | if (docTerms != prevTerms) { |
209 | documentTermsDB.put(docId: id, list: docTerms); |
210 | } |
211 | } |
212 | |
213 | if (operations & XAttrTerms) { |
214 | QVector<QByteArray> prevTerms = documentXattrTermsDB.get(docId: id); |
215 | QVector<QByteArray> docXattrTerms = replaceTerms(id, prevTerms, terms: doc.m_xattrTerms); |
216 | |
217 | if (docXattrTerms != prevTerms) { |
218 | if (!docXattrTerms.isEmpty()) { |
219 | documentXattrTermsDB.put(docId: id, list: docXattrTerms); |
220 | } else { |
221 | documentXattrTermsDB.del(docId: id); |
222 | } |
223 | } |
224 | } |
225 | |
226 | if (operations & FileNameTerms) { |
227 | QVector<QByteArray> prevTerms = documentFileNameTermsDB.get(docId: id); |
228 | QVector<QByteArray> docFileNameTerms = replaceTerms(id, prevTerms, terms: doc.m_fileNameTerms); |
229 | |
230 | if (docFileNameTerms != prevTerms) { |
231 | if (!docFileNameTerms.isEmpty()) { |
232 | documentFileNameTermsDB.put(docId: id, list: docFileNameTerms); |
233 | } else { |
234 | documentFileNameTermsDB.del(docId: id); |
235 | } |
236 | } |
237 | } |
238 | |
239 | if (doc.contentIndexing()) { |
240 | contentIndexingDB.put(docId: doc.id()); |
241 | } |
242 | |
243 | if (operations & DocumentTime) { |
244 | DocumentTimeDB::TimeInfo info = docTimeDB.get(docId: id); |
245 | if (info.mTime != doc.m_mTime) { |
246 | mtimeDB.del(mtime: info.mTime, docId: id); |
247 | mtimeDB.put(mtime: doc.m_mTime, docId: id); |
248 | } |
249 | |
250 | info.mTime = doc.m_mTime; |
251 | info.cTime = doc.m_cTime; |
252 | docTimeDB.put(docId: id, info); |
253 | } |
254 | |
255 | if (operations & DocumentData) { |
256 | if (!doc.m_data.isEmpty()) { |
257 | docDataDB.put(docId: id, data: doc.m_data); |
258 | } else { |
259 | docDataDB.del(docId: id); |
260 | } |
261 | } |
262 | |
263 | if (operations & DocumentUrl) { |
264 | auto url = doc.url(); |
265 | int pos = url.lastIndexOf(c: '/'); |
266 | auto newname = url.mid(index: pos + 1); |
267 | docUrlDB.updateUrl(id: doc.id(), newParentId: doc.parentId(), newName: newname); |
268 | } |
269 | } |
270 | |
271 | QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector<QByteArray>& prevTerms, |
272 | const QMap<QByteArray, Document::TermData>& terms) |
273 | { |
274 | m_pendingOperations.reserve(size: m_pendingOperations.size() + prevTerms.size() + terms.size()); |
275 | for (const QByteArray& term : prevTerms) { |
276 | Operation op; |
277 | op.type = RemoveId; |
278 | op.data.docId = id; |
279 | |
280 | m_pendingOperations[term].append(t: op); |
281 | } |
282 | |
283 | return addTerms(id, terms); |
284 | } |
285 | |
286 | void WriteTransaction::commit() |
287 | { |
288 | PostingDB postingDB(m_dbis.postingDbi, m_txn); |
289 | PositionDB positionDB(m_dbis.positionDBi, m_txn); |
290 | |
291 | QHashIterator<QByteArray, QVector<Operation> > iter(m_pendingOperations); |
292 | while (iter.hasNext()) { |
293 | iter.next(); |
294 | |
295 | const QByteArray& term = iter.key(); |
296 | const QVector<Operation> operations = iter.value(); |
297 | |
298 | PostingList list = postingDB.get(term); |
299 | |
300 | bool fetchedPositionList = false; |
301 | QVector<PositionInfo> positionList; |
302 | |
303 | for (const Operation& op : operations) { |
304 | quint64 id = op.data.docId; |
305 | |
306 | if (op.type == AddId) { |
307 | sortedIdInsert(vec&: list, id); |
308 | |
309 | if (!op.data.positions.isEmpty()) { |
310 | if (!fetchedPositionList) { |
311 | positionList = positionDB.get(term); |
312 | fetchedPositionList = true; |
313 | } |
314 | sortedIdInsert(vec&: positionList, id: op.data); |
315 | } |
316 | } |
317 | else { |
318 | sortedIdRemove(vec&: list, id); |
319 | if (!fetchedPositionList) { |
320 | positionList = positionDB.get(term); |
321 | fetchedPositionList = true; |
322 | } |
323 | sortedIdRemove(vec&: positionList, id: PositionInfo(id)); |
324 | } |
325 | } |
326 | |
327 | if (!list.isEmpty()) { |
328 | postingDB.put(term, list); |
329 | } else { |
330 | postingDB.del(term); |
331 | } |
332 | |
333 | if (fetchedPositionList) { |
334 | if (!positionList.isEmpty()) { |
335 | positionDB.put(term, list: positionList); |
336 | } else { |
337 | positionDB.del(term); |
338 | } |
339 | } |
340 | } |
341 | |
342 | m_pendingOperations.clear(); |
343 | } |
344 | |