| 1 | /* |
| 2 | This file is part of the KDE Baloo project. |
| 3 | SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.1-or-later |
| 6 | */ |
| 7 | |
| 8 | #include "writetransaction.h" |
| 9 | #include "transaction.h" |
| 10 | |
| 11 | #include "postingdb.h" |
| 12 | #include "documentdb.h" |
| 13 | #include "documentiddb.h" |
| 14 | #include "positiondb.h" |
| 15 | #include "documenttimedb.h" |
| 16 | #include "documentdatadb.h" |
| 17 | #include "mtimedb.h" |
| 18 | #include "idutils.h" |
| 19 | |
| 20 | using namespace Baloo; |
| 21 | |
| 22 | void WriteTransaction::addDocument(const Document& doc) |
| 23 | { |
| 24 | quint64 id = doc.id(); |
| 25 | |
| 26 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
| 27 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
| 28 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
| 29 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
| 30 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
| 31 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
| 32 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
| 33 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
| 34 | |
| 35 | Q_ASSERT(!documentTermsDB.contains(id)); |
| 36 | Q_ASSERT(!documentXattrTermsDB.contains(id)); |
| 37 | Q_ASSERT(!documentFileNameTermsDB.contains(id)); |
| 38 | Q_ASSERT(!docTimeDB.contains(id)); |
| 39 | Q_ASSERT(!docDataDB.contains(id)); |
| 40 | Q_ASSERT(!contentIndexingDB.contains(id)); |
| 41 | Q_ASSERT(doc.parentId()); |
| 42 | |
| 43 | { |
| 44 | auto url = doc.url(); |
| 45 | int pos = url.lastIndexOf(ch: '/'); |
| 46 | auto filename = url.mid(index: pos + 1); |
| 47 | auto parentId = doc.parentId(); |
| 48 | |
| 49 | if (pos > 0) { |
| 50 | docUrlDB.addPath(url: url.left(n: pos)); |
| 51 | } else { |
| 52 | parentId = 0; |
| 53 | } |
| 54 | |
| 55 | if (!docUrlDB.put(docId: id, parentId, fileName: filename)) { |
| 56 | return; |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | QVector<QByteArray> docTerms = addTerms(id, terms: doc.m_terms); |
| 61 | Q_ASSERT(!docTerms.empty()); |
| 62 | documentTermsDB.put(docId: id, list: docTerms); |
| 63 | |
| 64 | QVector<QByteArray> docXattrTerms = addTerms(id, terms: doc.m_xattrTerms); |
| 65 | if (!docXattrTerms.isEmpty()) { |
| 66 | documentXattrTermsDB.put(docId: id, list: docXattrTerms); |
| 67 | } |
| 68 | |
| 69 | QVector<QByteArray> docFileNameTerms = addTerms(id, terms: doc.m_fileNameTerms); |
| 70 | if (!docFileNameTerms.isEmpty()) { |
| 71 | documentFileNameTermsDB.put(docId: id, list: docFileNameTerms); |
| 72 | } |
| 73 | |
| 74 | if (doc.contentIndexing()) { |
| 75 | contentIndexingDB.put(docId: doc.id()); |
| 76 | } |
| 77 | |
| 78 | DocumentTimeDB::TimeInfo info; |
| 79 | info.mTime = doc.m_mTime; |
| 80 | info.cTime = doc.m_cTime; |
| 81 | |
| 82 | docTimeDB.put(docId: id, info); |
| 83 | mtimeDB.put(mtime: doc.m_mTime, docId: id); |
| 84 | |
| 85 | if (!doc.m_data.isEmpty()) { |
| 86 | docDataDB.put(docId: id, data: doc.m_data); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | QVector<QByteArray> WriteTransaction::addTerms(quint64 id, const QMap<QByteArray, Document::TermData>& terms) |
| 91 | { |
| 92 | QVector<QByteArray> termList; |
| 93 | termList.reserve(asize: terms.size()); |
| 94 | m_pendingOperations.reserve(size: m_pendingOperations.size() + terms.size()); |
| 95 | |
| 96 | for (auto it = terms.cbegin(), end = terms.cend(); it != end; ++it) { |
| 97 | const QByteArray& term = it.key(); |
| 98 | termList.append(t: term); |
| 99 | |
| 100 | Operation op; |
| 101 | op.type = AddId; |
| 102 | op.data.docId = id; |
| 103 | op.data.positions = it.value().positions; |
| 104 | |
| 105 | m_pendingOperations[term].append(t: op); |
| 106 | } |
| 107 | |
| 108 | return termList; |
| 109 | } |
| 110 | |
| 111 | void WriteTransaction::removeDocument(quint64 id) |
| 112 | { |
| 113 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
| 114 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
| 115 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
| 116 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
| 117 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
| 118 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
| 119 | DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn); |
| 120 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
| 121 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
| 122 | |
| 123 | removeTerms(id, terms: documentTermsDB.get(docId: id)); |
| 124 | removeTerms(id, terms: documentFileNameTermsDB.get(docId: id)); |
| 125 | if (documentXattrTermsDB.contains(docId: id)) { |
| 126 | removeTerms(id, terms: documentXattrTermsDB.get(docId: id)); |
| 127 | } |
| 128 | |
| 129 | documentTermsDB.del(docId: id); |
| 130 | documentXattrTermsDB.del(docId: id); |
| 131 | documentFileNameTermsDB.del(docId: id); |
| 132 | |
| 133 | docUrlDB.del(docId: id); |
| 134 | |
| 135 | contentIndexingDB.del(docID: id); |
| 136 | failedIndexingDB.del(docID: id); |
| 137 | |
| 138 | DocumentTimeDB::TimeInfo info = docTimeDB.get(docId: id); |
| 139 | docTimeDB.del(docId: id); |
| 140 | mtimeDB.del(mtime: info.mTime, docId: id); |
| 141 | |
| 142 | docDataDB.del(docId: id); |
| 143 | } |
| 144 | |
| 145 | void WriteTransaction::removeTerms(quint64 id, const QVector<QByteArray>& terms) |
| 146 | { |
| 147 | for (const QByteArray& term : terms) { |
| 148 | Operation op; |
| 149 | op.type = RemoveId; |
| 150 | op.data.docId = id; |
| 151 | |
| 152 | m_pendingOperations[term].append(t: op); |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | void WriteTransaction::removeRecursively(quint64 parentId) |
| 157 | { |
| 158 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
| 159 | |
| 160 | const QVector<quint64> children = docUrlDB.getChildren(docId: parentId); |
| 161 | for (quint64 id : children) { |
| 162 | if (id) { |
| 163 | removeRecursively(parentId: id); |
| 164 | } |
| 165 | } |
| 166 | removeDocument(id: parentId); |
| 167 | } |
| 168 | |
| 169 | bool WriteTransaction::removeRecursively(quint64 parentId, const std::function<bool(quint64)> &shouldDelete) |
| 170 | { |
| 171 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
| 172 | |
| 173 | if (parentId && !shouldDelete(parentId)) { |
| 174 | return false; |
| 175 | } |
| 176 | |
| 177 | bool isEmpty = true; |
| 178 | const QVector<quint64> children = docUrlDB.getChildren(docId: parentId); |
| 179 | for (quint64 id : children) { |
| 180 | isEmpty &= removeRecursively(parentId: id, shouldDelete); |
| 181 | } |
| 182 | // refetch |
| 183 | if (isEmpty && docUrlDB.getChildren(docId: parentId).isEmpty()) { |
| 184 | removeDocument(id: parentId); |
| 185 | return true; |
| 186 | } |
| 187 | return false; |
| 188 | } |
| 189 | |
| 190 | void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations) |
| 191 | { |
| 192 | DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); |
| 193 | DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); |
| 194 | DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); |
| 195 | DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); |
| 196 | DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); |
| 197 | DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); |
| 198 | MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); |
| 199 | DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); |
| 200 | |
| 201 | const quint64 id = doc.id(); |
| 202 | |
| 203 | if (operations & DocumentTerms) { |
| 204 | Q_ASSERT(!doc.m_terms.isEmpty()); |
| 205 | QVector<QByteArray> prevTerms = documentTermsDB.get(docId: id); |
| 206 | QVector<QByteArray> docTerms = replaceTerms(id, prevTerms, terms: doc.m_terms); |
| 207 | |
| 208 | if (docTerms != prevTerms) { |
| 209 | documentTermsDB.put(docId: id, list: docTerms); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | if (operations & XAttrTerms) { |
| 214 | QVector<QByteArray> prevTerms = documentXattrTermsDB.get(docId: id); |
| 215 | QVector<QByteArray> docXattrTerms = replaceTerms(id, prevTerms, terms: doc.m_xattrTerms); |
| 216 | |
| 217 | if (docXattrTerms != prevTerms) { |
| 218 | if (!docXattrTerms.isEmpty()) { |
| 219 | documentXattrTermsDB.put(docId: id, list: docXattrTerms); |
| 220 | } else { |
| 221 | documentXattrTermsDB.del(docId: id); |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | if (operations & FileNameTerms) { |
| 227 | QVector<QByteArray> prevTerms = documentFileNameTermsDB.get(docId: id); |
| 228 | QVector<QByteArray> docFileNameTerms = replaceTerms(id, prevTerms, terms: doc.m_fileNameTerms); |
| 229 | |
| 230 | if (docFileNameTerms != prevTerms) { |
| 231 | if (!docFileNameTerms.isEmpty()) { |
| 232 | documentFileNameTermsDB.put(docId: id, list: docFileNameTerms); |
| 233 | } else { |
| 234 | documentFileNameTermsDB.del(docId: id); |
| 235 | } |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | if (doc.contentIndexing()) { |
| 240 | contentIndexingDB.put(docId: doc.id()); |
| 241 | } |
| 242 | |
| 243 | if (operations & DocumentTime) { |
| 244 | DocumentTimeDB::TimeInfo info = docTimeDB.get(docId: id); |
| 245 | if (info.mTime != doc.m_mTime) { |
| 246 | mtimeDB.del(mtime: info.mTime, docId: id); |
| 247 | mtimeDB.put(mtime: doc.m_mTime, docId: id); |
| 248 | } |
| 249 | |
| 250 | info.mTime = doc.m_mTime; |
| 251 | info.cTime = doc.m_cTime; |
| 252 | docTimeDB.put(docId: id, info); |
| 253 | } |
| 254 | |
| 255 | if (operations & DocumentData) { |
| 256 | if (!doc.m_data.isEmpty()) { |
| 257 | docDataDB.put(docId: id, data: doc.m_data); |
| 258 | } else { |
| 259 | docDataDB.del(docId: id); |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | if (operations & DocumentUrl) { |
| 264 | auto url = doc.url(); |
| 265 | int pos = url.lastIndexOf(ch: '/'); |
| 266 | auto newname = url.mid(index: pos + 1); |
| 267 | docUrlDB.updateUrl(id: doc.id(), newParentId: doc.parentId(), newName: newname); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector<QByteArray>& prevTerms, |
| 272 | const QMap<QByteArray, Document::TermData>& terms) |
| 273 | { |
| 274 | m_pendingOperations.reserve(size: m_pendingOperations.size() + prevTerms.size() + terms.size()); |
| 275 | for (const QByteArray& term : prevTerms) { |
| 276 | Operation op; |
| 277 | op.type = RemoveId; |
| 278 | op.data.docId = id; |
| 279 | |
| 280 | m_pendingOperations[term].append(t: op); |
| 281 | } |
| 282 | |
| 283 | return addTerms(id, terms); |
| 284 | } |
| 285 | |
| 286 | void WriteTransaction::commit() |
| 287 | { |
| 288 | PostingDB postingDB(m_dbis.postingDbi, m_txn); |
| 289 | PositionDB positionDB(m_dbis.positionDBi, m_txn); |
| 290 | |
| 291 | QHashIterator<QByteArray, QVector<Operation> > iter(m_pendingOperations); |
| 292 | while (iter.hasNext()) { |
| 293 | iter.next(); |
| 294 | |
| 295 | const QByteArray& term = iter.key(); |
| 296 | const QVector<Operation> operations = iter.value(); |
| 297 | |
| 298 | PostingList list = postingDB.get(term); |
| 299 | |
| 300 | bool fetchedPositionList = false; |
| 301 | QVector<PositionInfo> positionList; |
| 302 | |
| 303 | for (const Operation& op : operations) { |
| 304 | quint64 id = op.data.docId; |
| 305 | |
| 306 | if (op.type == AddId) { |
| 307 | sortedIdInsert(vec&: list, id); |
| 308 | |
| 309 | if (!op.data.positions.isEmpty()) { |
| 310 | if (!fetchedPositionList) { |
| 311 | positionList = positionDB.get(term); |
| 312 | fetchedPositionList = true; |
| 313 | } |
| 314 | sortedIdInsert(vec&: positionList, id: op.data); |
| 315 | } |
| 316 | } |
| 317 | else { |
| 318 | sortedIdRemove(vec&: list, id); |
| 319 | if (!fetchedPositionList) { |
| 320 | positionList = positionDB.get(term); |
| 321 | fetchedPositionList = true; |
| 322 | } |
| 323 | sortedIdRemove(vec&: positionList, id: PositionInfo(id)); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | if (!list.isEmpty()) { |
| 328 | postingDB.put(term, list); |
| 329 | } else { |
| 330 | postingDB.del(term); |
| 331 | } |
| 332 | |
| 333 | if (fetchedPositionList) { |
| 334 | if (!positionList.isEmpty()) { |
| 335 | positionDB.put(term, list: positionList); |
| 336 | } else { |
| 337 | positionDB.del(term); |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | m_pendingOperations.clear(); |
| 343 | } |
| 344 | |