1/*
2 This file is part of the KDE Baloo Project
3 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6*/
7
8#include "baloodebug.h"
9#include "searchstore.h"
10#include "global.h"
11
12#include "database.h"
13#include "term.h"
14#include "transaction.h"
15#include "enginequery.h"
16#include "termgenerator.h"
17#include "andpostingiterator.h"
18#include "orpostingiterator.h"
19
20#include <QDateTime>
21
22#include <KFileMetaData/PropertyInfo>
23#include <KFileMetaData/TypeInfo>
24#include <KFileMetaData/Types>
25
26#include <algorithm>
27#include <array>
28#include <tuple>
29
30namespace Baloo {
31
32namespace {
33QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com)
34{
35 Q_ASSERT(dt.isValid());
36
37 if (com == Term::Equal) {
38 // Timestamps in DB are quint32 relative to Epoch (1970...2106)
39 auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch());
40 auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch());
41 return {start, end};
42 }
43
44 quint32 timet = dt.toSecsSinceEpoch();
45 if (com == Term::LessEqual) {
46 return {0, timet};
47 }
48 if (com == Term::Less) {
49 return {0, timet - 1};
50 }
51 if (com == Term::GreaterEqual) {
52 return {timet, std::numeric_limits<quint32>::max()};
53 }
54 if (com == Term::Greater) {
55 return {timet + 1, std::numeric_limits<quint32>::max()};
56 }
57
58 Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator");
59 return {0, 0};
60}
61
62struct InternalProperty {
63 const char* propertyName;
64 const char* prefix;
65 QMetaType::Type valueType;
66};
67constexpr std::array<InternalProperty, 7> internalProperties{._M_elems: {{.propertyName: "content", .prefix: "", .valueType: QMetaType::QString},
68 {.propertyName: "filename", .prefix: "F", .valueType: QMetaType::QString},
69 {.propertyName: "mimetype", .prefix: "M", .valueType: QMetaType::QString},
70 {.propertyName: "rating", .prefix: "R", .valueType: QMetaType::Int},
71 {.propertyName: "tag", .prefix: "TAG-", .valueType: QMetaType::QString},
72 {.propertyName: "tags", .prefix: "TA", .valueType: QMetaType::QString},
73 {.propertyName: "usercomment", .prefix: "C", .valueType: QMetaType::QString}}};
74
75std::pair<QByteArray, QMetaType::Type> propertyInfo(const QByteArray &property)
76{
77 auto it = std::find_if(first: std::begin(cont: internalProperties), last: std::end(cont: internalProperties),
78 pred: [&property] (const InternalProperty& entry) { return property == entry.propertyName; });
79 if (it != std::end(cont: internalProperties)) {
80 return { (*it).prefix, (*it).valueType };
81 } else {
82 KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(name: QString::fromUtf8(ba: property));
83 if (pi.property() == KFileMetaData::Property::Empty) {
84 return {QByteArray(), QMetaType::UnknownType};
85 }
86 int propPrefix = static_cast<int>(pi.property());
87 return {QByteArray('X' + QByteArray::number(propPrefix) + '-'), pi.valueType()};
88 }
89}
90
91EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value)
92{
93 // We use the TermGenerator to normalize the words in the value and to
94 // split it into other words. If we split the words, we then add them as a
95 // phrase query.
96 const QByteArrayList terms = TermGenerator::termList(text: value);
97
98 QVector<EngineQuery> queries;
99 queries.reserve(asize: terms.size());
100 for (const QByteArray& term : terms) {
101 QByteArray arr = prefix + term;
102 // FIXME - compatibility hack, to find truncated terms with old
103 // DBs, remove on next DB bump
104 if (arr.size() > 25) {
105 queries << EngineQuery(arr.left(len: 25), EngineQuery::StartsWith);
106 } else {
107 queries << EngineQuery(arr);
108 }
109 }
110
111 if (queries.isEmpty()) {
112 return EngineQuery();
113 } else if (queries.size() == 1) {
114 return queries.first();
115 } else {
116 return EngineQuery(queries);
117 }
118}
119
120EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value)
121{
122 auto query = constructEqualsQuery(prefix, value);
123 if (query.op() == EngineQuery::Equal) {
124 if (query.term().size() >= 3) {
125 query.setOp(EngineQuery::StartsWith);
126 }
127 }
128 return query;
129}
130
131EngineQuery constructTypeQuery(const QString& value)
132{
133 Q_ASSERT(!value.isEmpty());
134
135 KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(name: value);
136 if (ti == KFileMetaData::Type::Empty) {
137 qCDebug(BALOO) << "Type" << value << "does not exist";
138 return EngineQuery();
139 }
140 int num = static_cast<int>(ti.type());
141
142 return EngineQuery('T' + QByteArray::number(num));
143}
144} // namespace
145
146SearchStore::SearchStore()
147 : m_db(nullptr)
148{
149 m_db = globalDatabaseInstance();
150 if (!m_db->open(mode: Database::ReadOnlyDatabase)) {
151 m_db = nullptr;
152 }
153}
154
155SearchStore::~SearchStore()
156{
157}
158
159// Return the result with-in [offset, offset + limit)
160ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults)
161{
162 if (!m_db || !m_db->isOpen()) {
163 return ResultList();
164 }
165
166 Transaction tr(m_db, Transaction::ReadOnly);
167 std::unique_ptr<PostingIterator> it(constructQuery(tr: &tr, term));
168 if (!it) {
169 return ResultList();
170 }
171
172 if (sortResults) {
173 QVector<std::pair<quint64, quint32>> resultIds;
174 while (it->next()) {
175 quint64 id = it->docId();
176 quint32 mtime = tr.documentTimeInfo(id).mTime;
177 resultIds << std::pair<quint64, quint32>{id, mtime};
178
179 Q_ASSERT(id > 0);
180 }
181
182 // Not enough results within range, no need to sort.
183 if (offset >= static_cast<uint>(resultIds.size())) {
184 return ResultList();
185 }
186
187 auto compFunc = [](const std::pair<quint64, quint32>& lhs,
188 const std::pair<quint64, quint32>& rhs) {
189 return lhs.second > rhs.second;
190 };
191
192 std::sort(first: resultIds.begin(), last: resultIds.end(), comp: compFunc);
193 if (limit < 0) {
194 limit = resultIds.size();
195 }
196
197 ResultList results;
198 const uint end = qMin(a: static_cast<uint>(resultIds.size()), b: offset + static_cast<uint>(limit));
199 results.reserve(n: end - offset);
200 for (uint i = offset; i < end; i++) {
201 const quint64 id = resultIds[i].first;
202 Result res{tr.documentUrl(id), id};
203
204 results.emplace_back(args&: res);
205 }
206
207 return results;
208 }
209 else {
210 ResultList results;
211 uint ulimit = limit < 0 ? UINT_MAX : limit;
212
213 while (offset && it->next()) {
214 offset--;
215 }
216
217 while (ulimit && it->next()) {
218 const quint64 id = it->docId();
219 Q_ASSERT(id > 0);
220 Result res{tr.documentUrl(id), id};
221 Q_ASSERT(!res.filePath.isEmpty());
222
223 results.emplace_back(args&: res);
224
225 ulimit--;
226 }
227
228 return results;
229 }
230}
231
232PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term)
233{
234 Q_ASSERT(tr);
235
236 if (term.operation() == Term::And || term.operation() == Term::Or) {
237 const QList<Term> subTerms = term.subTerms();
238 QVector<PostingIterator*> vec;
239 vec.reserve(asize: subTerms.size());
240
241 for (const Term& t : subTerms) {
242 auto iterator = constructQuery(tr, term: t);
243 // constructQuery returns a nullptr to signal an empty list
244 if (iterator) {
245 vec << iterator;
246 } else if (term.operation() == Term::And) {
247 return nullptr;
248 }
249 }
250
251 if (vec.isEmpty()) {
252 return nullptr;
253 } else if (vec.size() == 1) {
254 return vec.takeFirst();
255 }
256
257 if (term.operation() == Term::And) {
258 return new AndPostingIterator(vec);
259 } else {
260 return new OrPostingIterator(vec);
261 }
262 }
263
264 if (term.value().isNull()) {
265 return nullptr;
266 }
267 Q_ASSERT(term.value().isValid());
268 Q_ASSERT(term.comparator() != Term::Auto);
269 Q_ASSERT(term.comparator() == Term::Contains ? term.value().typeId() == QMetaType::QString : true);
270
271 const QVariant value = term.value();
272 const QByteArray property = term.property().toLower().toUtf8();
273
274 if (property == "type" || property == "kind") {
275 EngineQuery q = constructTypeQuery(value: value.toString());
276 return tr->postingIterator(query: q);
277 }
278 else if (property == "includefolder") {
279 const QByteArray folder = value.toString().toUtf8();
280
281 if (folder.isEmpty()) {
282 return nullptr;
283 }
284 if (!folder.startsWith(c: '/')) {
285 return nullptr;
286 }
287
288 quint64 id = tr->documentId(path: folder);
289 if (!id) {
290 qCDebug(BALOO) << "Folder" << value.toString() << "not indexed";
291 return nullptr;
292 }
293
294 return tr->docUrlIter(id);
295 }
296 else if (property == "modified" || property == "mtime") {
297 if (value.typeId() == QMetaType::QByteArray) {
298 // Used by Baloo::Query
299 QByteArray ba = value.toByteArray();
300 Q_ASSERT(ba.size() >= 4);
301
302 int year = ba.mid(index: 0, len: 4).toInt();
303 int month = ba.mid(index: 4, len: 2).toInt();
304 int day = ba.mid(index: 6, len: 2).toInt();
305
306 Q_ASSERT(year);
307
308 // uses 0 to represent whole month or whole year
309 month = month >= 0 && month <= 12 ? month : 0;
310 day = day >= 0 && day <= 31 ? day : 0;
311
312 QDate startDate(year, month ? month : 1, day ? day : 1);
313 QDate endDate(startDate);
314
315 if (month == 0) {
316 endDate.setDate(year: endDate.year(), month: 12, day: 31);
317 } else if (day == 0) {
318 endDate.setDate(year: endDate.year(), month: endDate.month(), day: endDate.daysInMonth());
319 }
320
321 return tr->mTimeRangeIter(beginTime: startDate.startOfDay().toSecsSinceEpoch(), endTime: endDate.endOfDay().toSecsSinceEpoch());
322 } else if (value.typeId() == QMetaType::QString) {
323 const QDateTime dt = value.toDateTime();
324 QPair<quint32, quint32> timerange = calculateTimeRange(dt, com: term.comparator());
325 if ((timerange.first == 0) && (timerange.second == 0)) {
326 return nullptr;
327 }
328 return tr->mTimeRangeIter(beginTime: timerange.first, endTime: timerange.second);
329 } else {
330 Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values");
331 return nullptr;
332 }
333 } else if (property == "tag") {
334 if (term.comparator() == Term::Equal) {
335 const QByteArray prefix = "TAG-";
336 EngineQuery q = EngineQuery(prefix + value.toByteArray());
337 return tr->postingIterator(query: q);
338 } else if (term.comparator() == Term::Contains) {
339 const QByteArray prefix = "TA";
340 EngineQuery q = constructEqualsQuery(prefix, value: value.toString());
341 return tr->postingIterator(query: q);
342 } else {
343 Q_ASSERT(0);
344 return nullptr;
345 }
346 } else if (property == "") {
347 Term cterm(QStringLiteral("content"), term.value(), term.comparator());
348 Term fterm(QStringLiteral("filename"), term.value(), term.comparator());
349 return constructQuery(tr, term: Term{cterm, Term::Operation::Or, fterm});
350 }
351
352 QByteArray prefix;
353 QMetaType::Type valueType = QMetaType::QString;
354 if (!property.isEmpty()) {
355 std::tie(args&: prefix, args&: valueType) = propertyInfo(property);
356 if (valueType == QMetaType::UnknownType) {
357 return nullptr;
358 }
359 }
360
361 auto com = term.comparator();
362 if (com == Term::Contains && valueType == QMetaType::Int) {
363 com = Term::Equal;
364 }
365 if (com == Term::Contains) {
366 EngineQuery q = constructContainsQuery(prefix, value: value.toString());
367 return tr->postingIterator(query: q);
368 }
369
370 if (com == Term::Equal) {
371 EngineQuery q = constructEqualsQuery(prefix, value: value.toString());
372 return tr->postingIterator(query: q);
373 }
374
375 PostingDB::Comparator pcom;
376 if (com == Term::Greater || com == Term::GreaterEqual) {
377 pcom = PostingDB::GreaterEqual;
378 } else if (com == Term::Less || com == Term::LessEqual) {
379 pcom = PostingDB::LessEqual;
380 }
381
382 // FIXME -- has to be kept in sync with the code from
383 // Baloo::Result::add
384 if (valueType == QMetaType::Int) {
385 qlonglong intVal = value.toLongLong();
386
387 if (term.comparator() == Term::Greater) {
388 intVal++;
389 } else if (term.comparator() == Term::Less) {
390 intVal--;
391 }
392
393 return tr->postingCompIterator(prefix, value: intVal, com: pcom);
394
395 } else if (valueType == QMetaType::Double) {
396 double dVal = value.toDouble();
397 return tr->postingCompIterator(prefix, value: dVal, com: pcom);
398
399 } else if (valueType == QMetaType::QDateTime) {
400 QDateTime dt = value.toDateTime();
401 const QByteArray ba = dt.toString(format: Qt::ISODate).toUtf8();
402 return tr->postingCompIterator(prefix, value: ba, com: pcom);
403
404 } else {
405 qCDebug(BALOO) << "Comparison must be with an integer";
406 }
407
408 return nullptr;
409}
410
411} // namespace Baloo
412

source code of baloo/src/lib/searchstore.cpp