1 | /* |
2 | This file is part of the KDE Project |
3 | SPDX-FileCopyrightText: 2008-2010 Sebastian Trueg <trueg@kde.org> |
4 | SPDX-FileCopyrightText: 2013-2014 Vishesh Handa <me@vhanda.in> |
5 | SPDX-FileCopyrightText: 2020 Benjamin Port <benjamin.port@enioka.com> |
6 | |
7 | SPDX-License-Identifier: LGPL-2.0-or-later |
8 | */ |
9 | |
10 | #include "fileindexerconfig.h" |
11 | #include "fileexcludefilters.h" |
12 | #include "storagedevices.h" |
13 | #include "baloodebug.h" |
14 | |
15 | #include <QStringList> |
16 | #include <QDir> |
17 | |
18 | #include <QStandardPaths> |
19 | #include "baloosettings.h" |
20 | |
21 | namespace |
22 | { |
23 | QString normalizeTrailingSlashes(QString&& path) |
24 | { |
25 | while (path.endsWith(c: QLatin1Char('/'))) { |
26 | path.chop(n: 1); |
27 | } |
28 | path += QLatin1Char('/'); |
29 | return path; |
30 | } |
31 | |
32 | } |
33 | |
34 | namespace Baloo |
35 | { |
36 | |
37 | FileIndexerConfig::FileIndexerConfig(QObject* parent) |
38 | : QObject(parent) |
39 | , m_settings(new BalooSettings(this)) |
40 | , m_folderCacheDirty(true) |
41 | , m_indexHidden(false) |
42 | , m_devices(nullptr) |
43 | , m_maxUncomittedFiles(40) |
44 | { |
45 | forceConfigUpdate(); |
46 | } |
47 | |
48 | FileIndexerConfig::~FileIndexerConfig() |
49 | { |
50 | } |
51 | |
52 | QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderConfig& entry) |
53 | { |
54 | QDebugStateSaver saver(dbg); |
55 | dbg.nospace() << entry.path << ": " |
56 | << (entry.isIncluded ? "included" : "excluded" ); |
57 | return dbg; |
58 | } |
59 | |
60 | QStringList FileIndexerConfig::includeFolders() const |
61 | { |
62 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
63 | |
64 | QStringList fl; |
65 | for (const auto& entry : m_folderCache) { |
66 | if (entry.isIncluded) { |
67 | fl << entry.path; |
68 | } |
69 | } |
70 | return fl; |
71 | } |
72 | |
73 | QStringList FileIndexerConfig::excludeFolders() const |
74 | { |
75 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
76 | |
77 | QStringList fl; |
78 | for (const auto& entry : m_folderCache) { |
79 | if (!entry.isIncluded) { |
80 | fl << entry.path; |
81 | } |
82 | } |
83 | return fl; |
84 | } |
85 | |
86 | QStringList FileIndexerConfig::excludeFilters() const |
87 | { |
88 | // read configured exclude filters |
89 | QStringList filters = m_settings->excludedFilters(); |
90 | |
91 | // make sure we always keep the latest default exclude filters |
92 | // TODO: there is one problem here. What if the user removed some of the default filters? |
93 | if (m_settings->excludedFiltersVersion() < defaultExcludeFilterListVersion()) { |
94 | filters += defaultExcludeFilterList(); |
95 | // in case the cfg entry was empty and filters == defaultExcludeFilterList() |
96 | filters.removeDuplicates(); |
97 | |
98 | // write the config directly since the KCM does not have support for the version yet |
99 | m_settings->setExcludedFilters(filters); |
100 | m_settings->setExcludedFiltersVersion(defaultExcludeFilterListVersion()); |
101 | } |
102 | |
103 | return filters; |
104 | } |
105 | |
106 | QStringList FileIndexerConfig::excludeMimetypes() const |
107 | { |
108 | return QList<QString>(m_excludeMimetypes.begin(), m_excludeMimetypes.end()); |
109 | } |
110 | |
111 | bool FileIndexerConfig::indexHiddenFilesAndFolders() const |
112 | { |
113 | return m_indexHidden; |
114 | } |
115 | |
116 | bool FileIndexerConfig::onlyBasicIndexing() const |
117 | { |
118 | return m_onlyBasicIndexing; |
119 | } |
120 | |
121 | bool FileIndexerConfig::canBeSearched(const QString& folder) const |
122 | { |
123 | QFileInfo fi(folder); |
124 | QString path = fi.absolutePath(); |
125 | if (!fi.isDir()) { |
126 | return false; |
127 | } else if (shouldFolderBeIndexed(path)) { |
128 | return true; |
129 | } |
130 | |
131 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
132 | |
133 | // Look for included descendants |
134 | for (const auto& entry : m_folderCache) { |
135 | if (entry.isIncluded && entry.path.startsWith(s: path)) { |
136 | return true; |
137 | } |
138 | } |
139 | |
140 | return false; |
141 | } |
142 | |
143 | bool FileIndexerConfig::shouldBeIndexed(const QString& path) const |
144 | { |
145 | QFileInfo fi(path); |
146 | if (fi.isDir()) { |
147 | return shouldFolderBeIndexed(path); |
148 | } else { |
149 | return (shouldFolderBeIndexed(path: fi.absolutePath()) && |
150 | (!fi.isHidden() || indexHiddenFilesAndFolders()) && |
151 | shouldFileBeIndexed(fileName: fi.fileName())); |
152 | } |
153 | } |
154 | |
155 | bool FileIndexerConfig::shouldFolderBeIndexed(const QString& path) const |
156 | { |
157 | QString folder; |
158 | auto normalizedPath = normalizeTrailingSlashes(path: QString(path)); |
159 | |
160 | if (folderInFolderList(path: normalizedPath, folder)) { |
161 | // we always index the folders in the list |
162 | // ignoring the name filters |
163 | if (folder == normalizedPath) { |
164 | return true; |
165 | } |
166 | |
167 | // check the exclude filters for all components of the path |
168 | // after folder |
169 | #ifndef __unix__ |
170 | QDir d(folder); |
171 | #endif |
172 | |
173 | const QStringView trailingPath = QStringView(normalizedPath).mid(pos: folder.size()); |
174 | const auto pathComponents = trailingPath.split(sep: QLatin1Char('/'), behavior: Qt::SkipEmptyParts); |
175 | for (const auto &c : pathComponents) { |
176 | if (!shouldFileBeIndexed(fileName: c.toString())) { |
177 | return false; |
178 | } |
179 | #ifndef __unix__ |
180 | if (!indexHiddenFilesAndFolders() || |
181 | !d.cd(c.toString()) || QFileInfo(d.path()).isHidden()) { |
182 | return false; |
183 | } |
184 | #endif |
185 | } |
186 | return true; |
187 | } |
188 | |
189 | return false; |
190 | } |
191 | |
192 | bool FileIndexerConfig::shouldFileBeIndexed(const QString& fileName) const |
193 | { |
194 | if (!indexHiddenFilesAndFolders() && fileName.startsWith(c: QLatin1Char('.'))) { |
195 | return false; |
196 | } |
197 | return !m_excludeFilterRegExpCache.exactMatch(s: fileName); |
198 | } |
199 | |
200 | bool FileIndexerConfig::shouldMimeTypeBeIndexed(const QString& mimeType) const |
201 | { |
202 | return !m_excludeMimetypes.contains(value: mimeType); |
203 | } |
204 | |
205 | bool FileIndexerConfig::folderInFolderList(const QString& path, QString& folder) const |
206 | { |
207 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
208 | |
209 | const QString p = normalizeTrailingSlashes(path: QString(path)); |
210 | |
211 | for (const auto& entry : m_folderCache) { |
212 | const QString& f = entry.path; |
213 | if (p.startsWith(s: f)) { |
214 | folder = f; |
215 | return entry.isIncluded; |
216 | } |
217 | } |
218 | // path is not in the list, thus it should not be included |
219 | folder.clear(); |
220 | return false; |
221 | } |
222 | |
223 | void FileIndexerConfig::FolderCache::cleanup() |
224 | { |
225 | // TODO There are two cases where "redundant" includes |
226 | // should be kept: |
227 | // 1. when the "tail" matches a path exclude filter |
228 | // (m_excludeFilterRegexpCache) |
229 | // 2. when the explicitly adds a hidden directory, and |
230 | // we want to index hidden dirs (m_indexHidden) |
231 | bool keepAllIncluded = true; |
232 | |
233 | auto entry = begin(); |
234 | while (entry != end()) { |
235 | if ((*entry).isIncluded && keepAllIncluded) { |
236 | ++entry; |
237 | continue; |
238 | } |
239 | |
240 | const QString entryPath = (*entry).path; |
241 | auto start = entry; ++start; |
242 | auto parent = std::find_if(first: start, last: end(), |
243 | pred: [&entryPath](const FolderConfig& _parent) { |
244 | return entryPath.startsWith(s: _parent.path); |
245 | }); |
246 | |
247 | if (parent != end()) { |
248 | if ((*entry).isIncluded == (*parent).isIncluded) { |
249 | // remove identical config |
250 | entry = erase(position: entry); |
251 | } else { |
252 | ++entry; |
253 | } |
254 | } else { |
255 | if (!(*entry).isIncluded) { |
256 | // remove excluded a topmost level (default) |
257 | entry = erase(position: entry); |
258 | } else { |
259 | ++entry; |
260 | } |
261 | } |
262 | } |
263 | } |
264 | |
265 | bool FileIndexerConfig::FolderConfig::operator<(const FolderConfig& other) const |
266 | { |
267 | return path.size() > other.path.size() || |
268 | (path.size() == other.path.size() && path < other.path); |
269 | } |
270 | |
271 | bool FileIndexerConfig::FolderCache::addFolderConfig(const FolderConfig& config) |
272 | { |
273 | if (config.path.isEmpty()) { |
274 | qCDebug(BALOO) << "Trying to add folder config entry with empty path" ; |
275 | return false; |
276 | } |
277 | auto newConfig{config}; |
278 | newConfig.path = QDir::cleanPath(path: config.path) + QLatin1Char('/'); |
279 | |
280 | auto it = std::lower_bound(first: cbegin(), last: cend(), val: newConfig); |
281 | if (it != cend() && (*it).path == newConfig.path) { |
282 | qCDebug(BALOO) << "Folder config entry for" << newConfig.path << "already exists" ; |
283 | return false; |
284 | } |
285 | |
286 | it = insert(position: it, x: newConfig); |
287 | return true; |
288 | } |
289 | |
290 | void FileIndexerConfig::buildFolderCache() |
291 | { |
292 | if (!m_folderCacheDirty) { |
293 | return; |
294 | } |
295 | |
296 | if (!m_devices) { |
297 | m_devices = new StorageDevices(this); |
298 | } |
299 | |
300 | FolderCache cache; |
301 | |
302 | const QStringList includeFolders = m_settings->folders(); |
303 | for (const auto& folder : includeFolders) { |
304 | if (!cache.addFolderConfig(config: {.path: folder, .isIncluded: true})) { |
305 | qCWarning(BALOO) << "Failed to add include folder config entry for" << folder; |
306 | } |
307 | } |
308 | |
309 | const QStringList excludeFolders = m_settings->excludedFolders(); |
310 | for (const auto& folder : excludeFolders) { |
311 | if (!cache.addFolderConfig(config: {.path: folder, .isIncluded: false})) { |
312 | qCWarning(BALOO) << "Failed to add exclude folder config entry for" << folder; |
313 | } |
314 | } |
315 | |
316 | // Add all removable media and network shares as ignored unless they have |
317 | // been explicitly added in the include list |
318 | const auto allMedia = m_devices->allMedia(); |
319 | for (const auto& device: allMedia) { |
320 | const QString mountPath = device.mountPath(); |
321 | if (!device.isUsable() && !mountPath.isEmpty()) { |
322 | if (!includeFolders.contains(str: mountPath)) { |
323 | cache.addFolderConfig(config: {.path: mountPath, .isIncluded: false}); |
324 | } |
325 | } |
326 | } |
327 | |
328 | cache.cleanup(); |
329 | qCDebug(BALOO) << "Folder cache:" << cache; |
330 | m_folderCache = cache; |
331 | |
332 | m_folderCacheDirty = false; |
333 | } |
334 | |
335 | void FileIndexerConfig::buildExcludeFilterRegExpCache() |
336 | { |
337 | QStringList newFilters = excludeFilters(); |
338 | m_excludeFilterRegExpCache.rebuildCacheFromFilterList(filters: newFilters); |
339 | } |
340 | |
341 | void FileIndexerConfig::buildMimeTypeCache() |
342 | { |
343 | const QStringList excludedTypes = m_settings->excludedMimetypes(); |
344 | m_excludeMimetypes = QSet<QString>(excludedTypes.begin(), excludedTypes.end()); |
345 | } |
346 | |
347 | void FileIndexerConfig::forceConfigUpdate() |
348 | { |
349 | m_settings->load(); |
350 | |
351 | m_folderCacheDirty = true; |
352 | buildExcludeFilterRegExpCache(); |
353 | buildMimeTypeCache(); |
354 | |
355 | m_indexHidden = m_settings->indexHiddenFolders(); |
356 | m_onlyBasicIndexing = m_settings->onlyBasicIndexing(); |
357 | } |
358 | |
359 | int FileIndexerConfig::databaseVersion() const |
360 | { |
361 | return m_settings->dbVersion(); |
362 | } |
363 | |
364 | void FileIndexerConfig::setDatabaseVersion(int version) |
365 | { |
366 | m_settings->setDbVersion(version); |
367 | m_settings->save(); |
368 | } |
369 | |
370 | bool FileIndexerConfig::indexingEnabled() const |
371 | { |
372 | return m_settings->indexingEnabled(); |
373 | } |
374 | |
375 | uint FileIndexerConfig::maxUncomittedFiles() const |
376 | { |
377 | return m_maxUncomittedFiles; |
378 | } |
379 | |
380 | } // namespace Baloo |
381 | |
382 | #include "moc_fileindexerconfig.cpp" |
383 | |