1 | /* |
2 | This file is part of the KDE Baloo Project |
3 | SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL |
6 | */ |
7 | |
8 | #include "baloodebug.h" |
9 | #include "searchstore.h" |
10 | #include "global.h" |
11 | |
12 | #include "database.h" |
13 | #include "term.h" |
14 | #include "transaction.h" |
15 | #include "enginequery.h" |
16 | #include "termgenerator.h" |
17 | #include "andpostingiterator.h" |
18 | #include "orpostingiterator.h" |
19 | |
20 | #include <QDateTime> |
21 | |
22 | #include <KFileMetaData/PropertyInfo> |
23 | #include <KFileMetaData/TypeInfo> |
24 | #include <KFileMetaData/Types> |
25 | |
26 | #include <algorithm> |
27 | #include <array> |
28 | #include <tuple> |
29 | |
30 | namespace Baloo { |
31 | |
32 | namespace { |
33 | QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com) |
34 | { |
35 | Q_ASSERT(dt.isValid()); |
36 | |
37 | if (com == Term::Equal) { |
38 | // Timestamps in DB are quint32 relative to Epoch (1970...2106) |
39 | auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch()); |
40 | auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch()); |
41 | return {start, end}; |
42 | } |
43 | |
44 | quint32 timet = dt.toSecsSinceEpoch(); |
45 | if (com == Term::LessEqual) { |
46 | return {0, timet}; |
47 | } |
48 | if (com == Term::Less) { |
49 | return {0, timet - 1}; |
50 | } |
51 | if (com == Term::GreaterEqual) { |
52 | return {timet, std::numeric_limits<quint32>::max()}; |
53 | } |
54 | if (com == Term::Greater) { |
55 | return {timet + 1, std::numeric_limits<quint32>::max()}; |
56 | } |
57 | |
58 | Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator" ); |
59 | return {0, 0}; |
60 | } |
61 | |
62 | struct InternalProperty { |
63 | const char* propertyName; |
64 | const char* prefix; |
65 | QMetaType::Type valueType; |
66 | }; |
67 | constexpr std::array<InternalProperty, 7> internalProperties{._M_elems: {{.propertyName: "content" , .prefix: "" , .valueType: QMetaType::QString}, |
68 | {.propertyName: "filename" , .prefix: "F" , .valueType: QMetaType::QString}, |
69 | {.propertyName: "mimetype" , .prefix: "M" , .valueType: QMetaType::QString}, |
70 | {.propertyName: "rating" , .prefix: "R" , .valueType: QMetaType::Int}, |
71 | {.propertyName: "tag" , .prefix: "TAG-" , .valueType: QMetaType::QString}, |
72 | {.propertyName: "tags" , .prefix: "TA" , .valueType: QMetaType::QString}, |
73 | {.propertyName: "usercomment" , .prefix: "C" , .valueType: QMetaType::QString}}}; |
74 | |
75 | std::pair<QByteArray, QMetaType::Type> propertyInfo(const QByteArray &property) |
76 | { |
77 | auto it = std::find_if(first: std::begin(cont: internalProperties), last: std::end(cont: internalProperties), |
78 | pred: [&property] (const InternalProperty& entry) { return property == entry.propertyName; }); |
79 | if (it != std::end(cont: internalProperties)) { |
80 | return { (*it).prefix, (*it).valueType }; |
81 | } else { |
82 | KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(name: QString::fromUtf8(ba: property)); |
83 | if (pi.property() == KFileMetaData::Property::Empty) { |
84 | return {QByteArray(), QMetaType::UnknownType}; |
85 | } |
86 | int propPrefix = static_cast<int>(pi.property()); |
87 | return {QByteArray('X' + QByteArray::number(propPrefix) + '-'), pi.valueType()}; |
88 | } |
89 | } |
90 | |
91 | EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value) |
92 | { |
93 | // We use the TermGenerator to normalize the words in the value and to |
94 | // split it into other words. If we split the words, we then add them as a |
95 | // phrase query. |
96 | const QByteArrayList terms = TermGenerator::termList(text: value); |
97 | |
98 | QVector<EngineQuery> queries; |
99 | queries.reserve(asize: terms.size()); |
100 | for (const QByteArray& term : terms) { |
101 | QByteArray arr = prefix + term; |
102 | // FIXME - compatibility hack, to find truncated terms with old |
103 | // DBs, remove on next DB bump |
104 | if (arr.size() > 25) { |
105 | queries << EngineQuery(arr.left(len: 25), EngineQuery::StartsWith); |
106 | } else { |
107 | queries << EngineQuery(arr); |
108 | } |
109 | } |
110 | |
111 | if (queries.isEmpty()) { |
112 | return EngineQuery(); |
113 | } else if (queries.size() == 1) { |
114 | return queries.first(); |
115 | } else { |
116 | return EngineQuery(queries); |
117 | } |
118 | } |
119 | |
120 | EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value) |
121 | { |
122 | auto query = constructEqualsQuery(prefix, value); |
123 | if (query.op() == EngineQuery::Equal) { |
124 | if (query.term().size() >= 3) { |
125 | query.setOp(EngineQuery::StartsWith); |
126 | } |
127 | } |
128 | return query; |
129 | } |
130 | |
131 | EngineQuery constructTypeQuery(const QString& value) |
132 | { |
133 | Q_ASSERT(!value.isEmpty()); |
134 | |
135 | KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(name: value); |
136 | if (ti == KFileMetaData::Type::Empty) { |
137 | qCDebug(BALOO) << "Type" << value << "does not exist" ; |
138 | return EngineQuery(); |
139 | } |
140 | int num = static_cast<int>(ti.type()); |
141 | |
142 | return EngineQuery('T' + QByteArray::number(num)); |
143 | } |
144 | } // namespace |
145 | |
146 | SearchStore::SearchStore() |
147 | : m_db(nullptr) |
148 | { |
149 | m_db = globalDatabaseInstance(); |
150 | if (!m_db->open(mode: Database::ReadOnlyDatabase)) { |
151 | m_db = nullptr; |
152 | } |
153 | } |
154 | |
155 | SearchStore::~SearchStore() |
156 | { |
157 | } |
158 | |
159 | // Return the result with-in [offset, offset + limit) |
160 | ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults) |
161 | { |
162 | if (!m_db || !m_db->isOpen()) { |
163 | return ResultList(); |
164 | } |
165 | |
166 | Transaction tr(m_db, Transaction::ReadOnly); |
167 | std::unique_ptr<PostingIterator> it(constructQuery(tr: &tr, term)); |
168 | if (!it) { |
169 | return ResultList(); |
170 | } |
171 | |
172 | if (sortResults) { |
173 | QVector<std::pair<quint64, quint32>> resultIds; |
174 | while (it->next()) { |
175 | quint64 id = it->docId(); |
176 | quint32 mtime = tr.documentTimeInfo(id).mTime; |
177 | resultIds << std::pair<quint64, quint32>{id, mtime}; |
178 | |
179 | Q_ASSERT(id > 0); |
180 | } |
181 | |
182 | // Not enough results within range, no need to sort. |
183 | if (offset >= static_cast<uint>(resultIds.size())) { |
184 | return ResultList(); |
185 | } |
186 | |
187 | auto compFunc = [](const std::pair<quint64, quint32>& lhs, |
188 | const std::pair<quint64, quint32>& rhs) { |
189 | return lhs.second > rhs.second; |
190 | }; |
191 | |
192 | std::sort(first: resultIds.begin(), last: resultIds.end(), comp: compFunc); |
193 | if (limit < 0) { |
194 | limit = resultIds.size(); |
195 | } |
196 | |
197 | ResultList results; |
198 | const uint end = qMin(a: static_cast<uint>(resultIds.size()), b: offset + static_cast<uint>(limit)); |
199 | results.reserve(n: end - offset); |
200 | for (uint i = offset; i < end; i++) { |
201 | const quint64 id = resultIds[i].first; |
202 | Result res{tr.documentUrl(id), id}; |
203 | |
204 | results.emplace_back(args&: res); |
205 | } |
206 | |
207 | return results; |
208 | } |
209 | else { |
210 | ResultList results; |
211 | uint ulimit = limit < 0 ? UINT_MAX : limit; |
212 | |
213 | while (offset && it->next()) { |
214 | offset--; |
215 | } |
216 | |
217 | while (ulimit && it->next()) { |
218 | const quint64 id = it->docId(); |
219 | Q_ASSERT(id > 0); |
220 | Result res{tr.documentUrl(id), id}; |
221 | Q_ASSERT(!res.filePath.isEmpty()); |
222 | |
223 | results.emplace_back(args&: res); |
224 | |
225 | ulimit--; |
226 | } |
227 | |
228 | return results; |
229 | } |
230 | } |
231 | |
232 | PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term) |
233 | { |
234 | Q_ASSERT(tr); |
235 | |
236 | if (term.operation() == Term::And || term.operation() == Term::Or) { |
237 | const QList<Term> subTerms = term.subTerms(); |
238 | QVector<PostingIterator*> vec; |
239 | vec.reserve(asize: subTerms.size()); |
240 | |
241 | for (const Term& t : subTerms) { |
242 | auto iterator = constructQuery(tr, term: t); |
243 | // constructQuery returns a nullptr to signal an empty list |
244 | if (iterator) { |
245 | vec << iterator; |
246 | } else if (term.operation() == Term::And) { |
247 | return nullptr; |
248 | } |
249 | } |
250 | |
251 | if (vec.isEmpty()) { |
252 | return nullptr; |
253 | } else if (vec.size() == 1) { |
254 | return vec.takeFirst(); |
255 | } |
256 | |
257 | if (term.operation() == Term::And) { |
258 | return new AndPostingIterator(vec); |
259 | } else { |
260 | return new OrPostingIterator(vec); |
261 | } |
262 | } |
263 | |
264 | if (term.value().isNull()) { |
265 | return nullptr; |
266 | } |
267 | Q_ASSERT(term.value().isValid()); |
268 | Q_ASSERT(term.comparator() != Term::Auto); |
269 | Q_ASSERT(term.comparator() == Term::Contains ? term.value().typeId() == QMetaType::QString : true); |
270 | |
271 | const QVariant value = term.value(); |
272 | const QByteArray property = term.property().toLower().toUtf8(); |
273 | |
274 | if (property == "type" || property == "kind" ) { |
275 | EngineQuery q = constructTypeQuery(value: value.toString()); |
276 | return tr->postingIterator(query: q); |
277 | } |
278 | else if (property == "includefolder" ) { |
279 | const QByteArray folder = value.toString().toUtf8(); |
280 | |
281 | if (folder.isEmpty()) { |
282 | return nullptr; |
283 | } |
284 | if (!folder.startsWith(c: '/')) { |
285 | return nullptr; |
286 | } |
287 | |
288 | quint64 id = tr->documentId(path: folder); |
289 | if (!id) { |
290 | qCDebug(BALOO) << "Folder" << value.toString() << "not indexed" ; |
291 | return nullptr; |
292 | } |
293 | |
294 | return tr->docUrlIter(id); |
295 | } |
296 | else if (property == "modified" || property == "mtime" ) { |
297 | if (value.typeId() == QMetaType::QByteArray) { |
298 | // Used by Baloo::Query |
299 | QByteArray ba = value.toByteArray(); |
300 | Q_ASSERT(ba.size() >= 4); |
301 | |
302 | int year = ba.mid(index: 0, len: 4).toInt(); |
303 | int month = ba.mid(index: 4, len: 2).toInt(); |
304 | int day = ba.mid(index: 6, len: 2).toInt(); |
305 | |
306 | Q_ASSERT(year); |
307 | |
308 | // uses 0 to represent whole month or whole year |
309 | month = month >= 0 && month <= 12 ? month : 0; |
310 | day = day >= 0 && day <= 31 ? day : 0; |
311 | |
312 | QDate startDate(year, month ? month : 1, day ? day : 1); |
313 | QDate endDate(startDate); |
314 | |
315 | if (month == 0) { |
316 | endDate.setDate(year: endDate.year(), month: 12, day: 31); |
317 | } else if (day == 0) { |
318 | endDate.setDate(year: endDate.year(), month: endDate.month(), day: endDate.daysInMonth()); |
319 | } |
320 | |
321 | return tr->mTimeRangeIter(beginTime: startDate.startOfDay().toSecsSinceEpoch(), endTime: endDate.endOfDay().toSecsSinceEpoch()); |
322 | } else if (value.typeId() == QMetaType::QString) { |
323 | const QDateTime dt = value.toDateTime(); |
324 | QPair<quint32, quint32> timerange = calculateTimeRange(dt, com: term.comparator()); |
325 | if ((timerange.first == 0) && (timerange.second == 0)) { |
326 | return nullptr; |
327 | } |
328 | return tr->mTimeRangeIter(beginTime: timerange.first, endTime: timerange.second); |
329 | } else { |
330 | Q_ASSERT_X(0, "SearchStore::constructQuery" , "modified property must contain date/datetime values" ); |
331 | return nullptr; |
332 | } |
333 | } else if (property == "tag" ) { |
334 | if (term.comparator() == Term::Equal) { |
335 | const QByteArray prefix = "TAG-" ; |
336 | EngineQuery q = EngineQuery(prefix + value.toByteArray()); |
337 | return tr->postingIterator(query: q); |
338 | } else if (term.comparator() == Term::Contains) { |
339 | const QByteArray prefix = "TA" ; |
340 | EngineQuery q = constructEqualsQuery(prefix, value: value.toString()); |
341 | return tr->postingIterator(query: q); |
342 | } else { |
343 | Q_ASSERT(0); |
344 | return nullptr; |
345 | } |
346 | } else if (property == "" ) { |
347 | Term cterm(QStringLiteral("content" ), term.value(), term.comparator()); |
348 | Term fterm(QStringLiteral("filename" ), term.value(), term.comparator()); |
349 | return constructQuery(tr, term: Term{cterm, Term::Operation::Or, fterm}); |
350 | } |
351 | |
352 | QByteArray prefix; |
353 | QMetaType::Type valueType = QMetaType::QString; |
354 | if (!property.isEmpty()) { |
355 | std::tie(args&: prefix, args&: valueType) = propertyInfo(property); |
356 | if (valueType == QMetaType::UnknownType) { |
357 | return nullptr; |
358 | } |
359 | } |
360 | |
361 | auto com = term.comparator(); |
362 | if (com == Term::Contains && valueType == QMetaType::Int) { |
363 | com = Term::Equal; |
364 | } |
365 | if (com == Term::Contains) { |
366 | EngineQuery q = constructContainsQuery(prefix, value: value.toString()); |
367 | return tr->postingIterator(query: q); |
368 | } |
369 | |
370 | if (com == Term::Equal) { |
371 | EngineQuery q = constructEqualsQuery(prefix, value: value.toString()); |
372 | return tr->postingIterator(query: q); |
373 | } |
374 | |
375 | PostingDB::Comparator pcom; |
376 | if (com == Term::Greater || com == Term::GreaterEqual) { |
377 | pcom = PostingDB::GreaterEqual; |
378 | } else if (com == Term::Less || com == Term::LessEqual) { |
379 | pcom = PostingDB::LessEqual; |
380 | } |
381 | |
382 | // FIXME -- has to be kept in sync with the code from |
383 | // Baloo::Result::add |
384 | if (valueType == QMetaType::Int) { |
385 | qlonglong intVal = value.toLongLong(); |
386 | |
387 | if (term.comparator() == Term::Greater) { |
388 | intVal++; |
389 | } else if (term.comparator() == Term::Less) { |
390 | intVal--; |
391 | } |
392 | |
393 | return tr->postingCompIterator(prefix, value: intVal, com: pcom); |
394 | |
395 | } else if (valueType == QMetaType::Double) { |
396 | double dVal = value.toDouble(); |
397 | return tr->postingCompIterator(prefix, value: dVal, com: pcom); |
398 | |
399 | } else if (valueType == QMetaType::QDateTime) { |
400 | QDateTime dt = value.toDateTime(); |
401 | const QByteArray ba = dt.toString(format: Qt::ISODate).toUtf8(); |
402 | return tr->postingCompIterator(prefix, value: ba, com: pcom); |
403 | |
404 | } else { |
405 | qCDebug(BALOO) << "Comparison must be with an integer" ; |
406 | } |
407 | |
408 | return nullptr; |
409 | } |
410 | |
411 | } // namespace Baloo |
412 | |