| 1 | /* |
| 2 | This file is part of the KDE Project |
| 3 | SPDX-FileCopyrightText: 2008-2010 Sebastian Trueg <trueg@kde.org> |
| 4 | SPDX-FileCopyrightText: 2013-2014 Vishesh Handa <me@vhanda.in> |
| 5 | SPDX-FileCopyrightText: 2020 Benjamin Port <benjamin.port@enioka.com> |
| 6 | |
| 7 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 8 | */ |
| 9 | |
| 10 | #include "fileindexerconfig.h" |
| 11 | #include "fileexcludefilters.h" |
| 12 | #include "storagedevices.h" |
| 13 | #include "baloodebug.h" |
| 14 | |
| 15 | #include <QStringList> |
| 16 | #include <QDir> |
| 17 | |
| 18 | #include <QStandardPaths> |
| 19 | #include "baloosettings.h" |
| 20 | |
| 21 | namespace |
| 22 | { |
| 23 | QString normalizeTrailingSlashes(QString&& path) |
| 24 | { |
| 25 | while (path.endsWith(c: QLatin1Char('/'))) { |
| 26 | path.chop(n: 1); |
| 27 | } |
| 28 | path += QLatin1Char('/'); |
| 29 | return path; |
| 30 | } |
| 31 | |
| 32 | } |
| 33 | |
| 34 | namespace Baloo |
| 35 | { |
| 36 | |
| 37 | FileIndexerConfig::FileIndexerConfig(QObject* parent) |
| 38 | : QObject(parent) |
| 39 | , m_settings(new BalooSettings(this)) |
| 40 | , m_folderCacheDirty(true) |
| 41 | , m_indexHidden(false) |
| 42 | , m_devices(nullptr) |
| 43 | , m_maxUncomittedFiles(40) |
| 44 | { |
| 45 | forceConfigUpdate(); |
| 46 | } |
| 47 | |
| 48 | FileIndexerConfig::~FileIndexerConfig() |
| 49 | { |
| 50 | } |
| 51 | |
| 52 | QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderConfig& entry) |
| 53 | { |
| 54 | QDebugStateSaver saver(dbg); |
| 55 | dbg.nospace() << entry.path << ": " |
| 56 | << (entry.isIncluded ? "included" : "excluded" ); |
| 57 | return dbg; |
| 58 | } |
| 59 | |
| 60 | QStringList FileIndexerConfig::includeFolders() const |
| 61 | { |
| 62 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
| 63 | |
| 64 | QStringList fl; |
| 65 | for (const auto& entry : m_folderCache) { |
| 66 | if (entry.isIncluded) { |
| 67 | fl << entry.path; |
| 68 | } |
| 69 | } |
| 70 | return fl; |
| 71 | } |
| 72 | |
| 73 | QStringList FileIndexerConfig::excludeFolders() const |
| 74 | { |
| 75 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
| 76 | |
| 77 | QStringList fl; |
| 78 | for (const auto& entry : m_folderCache) { |
| 79 | if (!entry.isIncluded) { |
| 80 | fl << entry.path; |
| 81 | } |
| 82 | } |
| 83 | return fl; |
| 84 | } |
| 85 | |
| 86 | QStringList FileIndexerConfig::excludeFilters() const |
| 87 | { |
| 88 | // read configured exclude filters |
| 89 | QStringList filters = m_settings->excludedFilters(); |
| 90 | |
| 91 | // make sure we always keep the latest default exclude filters |
| 92 | // TODO: there is one problem here. What if the user removed some of the default filters? |
| 93 | if (m_settings->excludedFiltersVersion() < defaultExcludeFilterListVersion()) { |
| 94 | filters += defaultExcludeFilterList(); |
| 95 | // in case the cfg entry was empty and filters == defaultExcludeFilterList() |
| 96 | filters.removeDuplicates(); |
| 97 | |
| 98 | // write the config directly since the KCM does not have support for the version yet |
| 99 | m_settings->setExcludedFilters(filters); |
| 100 | m_settings->setExcludedFiltersVersion(defaultExcludeFilterListVersion()); |
| 101 | } |
| 102 | |
| 103 | return filters; |
| 104 | } |
| 105 | |
| 106 | QStringList FileIndexerConfig::excludeMimetypes() const |
| 107 | { |
| 108 | return QList<QString>(m_excludeMimetypes.begin(), m_excludeMimetypes.end()); |
| 109 | } |
| 110 | |
| 111 | bool FileIndexerConfig::indexHiddenFilesAndFolders() const |
| 112 | { |
| 113 | return m_indexHidden; |
| 114 | } |
| 115 | |
| 116 | bool FileIndexerConfig::onlyBasicIndexing() const |
| 117 | { |
| 118 | return m_onlyBasicIndexing; |
| 119 | } |
| 120 | |
| 121 | bool FileIndexerConfig::canBeSearched(const QString& folder) const |
| 122 | { |
| 123 | QFileInfo fi(folder); |
| 124 | QString path = fi.absolutePath(); |
| 125 | if (!fi.isDir()) { |
| 126 | return false; |
| 127 | } else if (shouldFolderBeIndexed(path)) { |
| 128 | return true; |
| 129 | } |
| 130 | |
| 131 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
| 132 | |
| 133 | // Look for included descendants |
| 134 | for (const auto& entry : m_folderCache) { |
| 135 | if (entry.isIncluded && entry.path.startsWith(s: path)) { |
| 136 | return true; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | return false; |
| 141 | } |
| 142 | |
| 143 | bool FileIndexerConfig::shouldBeIndexed(const QString& path) const |
| 144 | { |
| 145 | QFileInfo fi(path); |
| 146 | if (fi.isDir()) { |
| 147 | return shouldFolderBeIndexed(path); |
| 148 | } else { |
| 149 | return (shouldFolderBeIndexed(path: fi.absolutePath()) && |
| 150 | (!fi.isHidden() || indexHiddenFilesAndFolders()) && |
| 151 | shouldFileBeIndexed(fileName: fi.fileName())); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | bool FileIndexerConfig::shouldFolderBeIndexed(const QString& path) const |
| 156 | { |
| 157 | QString folder; |
| 158 | auto normalizedPath = normalizeTrailingSlashes(path: QString(path)); |
| 159 | |
| 160 | if (folderInFolderList(path: normalizedPath, folder)) { |
| 161 | // we always index the folders in the list |
| 162 | // ignoring the name filters |
| 163 | if (folder == normalizedPath) { |
| 164 | return true; |
| 165 | } |
| 166 | |
| 167 | // check the exclude filters for all components of the path |
| 168 | // after folder |
| 169 | #ifndef __unix__ |
| 170 | QDir d(folder); |
| 171 | #endif |
| 172 | |
| 173 | const QStringView trailingPath = QStringView(normalizedPath).mid(pos: folder.size()); |
| 174 | const auto pathComponents = trailingPath.split(sep: QLatin1Char('/'), behavior: Qt::SkipEmptyParts); |
| 175 | for (const auto &c : pathComponents) { |
| 176 | if (!shouldFileBeIndexed(fileName: c.toString())) { |
| 177 | return false; |
| 178 | } |
| 179 | #ifndef __unix__ |
| 180 | if (!indexHiddenFilesAndFolders() || |
| 181 | !d.cd(c.toString()) || QFileInfo(d.path()).isHidden()) { |
| 182 | return false; |
| 183 | } |
| 184 | #endif |
| 185 | } |
| 186 | return true; |
| 187 | } |
| 188 | |
| 189 | return false; |
| 190 | } |
| 191 | |
| 192 | bool FileIndexerConfig::shouldFileBeIndexed(const QString& fileName) const |
| 193 | { |
| 194 | if (!indexHiddenFilesAndFolders() && fileName.startsWith(c: QLatin1Char('.'))) { |
| 195 | return false; |
| 196 | } |
| 197 | return !m_excludeFilterRegExpCache.exactMatch(s: fileName); |
| 198 | } |
| 199 | |
| 200 | bool FileIndexerConfig::shouldMimeTypeBeIndexed(const QString& mimeType) const |
| 201 | { |
| 202 | return !m_excludeMimetypes.contains(value: mimeType); |
| 203 | } |
| 204 | |
| 205 | bool FileIndexerConfig::folderInFolderList(const QString& path, QString& folder) const |
| 206 | { |
| 207 | const_cast<FileIndexerConfig*>(this)->buildFolderCache(); |
| 208 | |
| 209 | const QString p = normalizeTrailingSlashes(path: QString(path)); |
| 210 | |
| 211 | for (const auto& entry : m_folderCache) { |
| 212 | const QString& f = entry.path; |
| 213 | if (p.startsWith(s: f)) { |
| 214 | folder = f; |
| 215 | return entry.isIncluded; |
| 216 | } |
| 217 | } |
| 218 | // path is not in the list, thus it should not be included |
| 219 | folder.clear(); |
| 220 | return false; |
| 221 | } |
| 222 | |
| 223 | void FileIndexerConfig::FolderCache::cleanup() |
| 224 | { |
| 225 | // TODO There are two cases where "redundant" includes |
| 226 | // should be kept: |
| 227 | // 1. when the "tail" matches a path exclude filter |
| 228 | // (m_excludeFilterRegexpCache) |
| 229 | // 2. when the explicitly adds a hidden directory, and |
| 230 | // we want to index hidden dirs (m_indexHidden) |
| 231 | bool keepAllIncluded = true; |
| 232 | |
| 233 | auto entry = begin(); |
| 234 | while (entry != end()) { |
| 235 | if ((*entry).isIncluded && keepAllIncluded) { |
| 236 | ++entry; |
| 237 | continue; |
| 238 | } |
| 239 | |
| 240 | const QString entryPath = (*entry).path; |
| 241 | auto start = entry; ++start; |
| 242 | auto parent = std::find_if(first: start, last: end(), |
| 243 | pred: [&entryPath](const FolderConfig& _parent) { |
| 244 | return entryPath.startsWith(s: _parent.path); |
| 245 | }); |
| 246 | |
| 247 | if (parent != end()) { |
| 248 | if ((*entry).isIncluded == (*parent).isIncluded) { |
| 249 | // remove identical config |
| 250 | entry = erase(position: entry); |
| 251 | } else { |
| 252 | ++entry; |
| 253 | } |
| 254 | } else { |
| 255 | if (!(*entry).isIncluded) { |
| 256 | // remove excluded a topmost level (default) |
| 257 | entry = erase(position: entry); |
| 258 | } else { |
| 259 | ++entry; |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | bool FileIndexerConfig::FolderConfig::operator<(const FolderConfig& other) const |
| 266 | { |
| 267 | return path.size() > other.path.size() || |
| 268 | (path.size() == other.path.size() && path < other.path); |
| 269 | } |
| 270 | |
| 271 | bool FileIndexerConfig::FolderCache::addFolderConfig(const FolderConfig& config) |
| 272 | { |
| 273 | if (config.path.isEmpty()) { |
| 274 | qCDebug(BALOO) << "Trying to add folder config entry with empty path" ; |
| 275 | return false; |
| 276 | } |
| 277 | auto newConfig{config}; |
| 278 | newConfig.path = QDir::cleanPath(path: config.path) + QLatin1Char('/'); |
| 279 | |
| 280 | auto it = std::lower_bound(first: cbegin(), last: cend(), val: newConfig); |
| 281 | if (it != cend() && (*it).path == newConfig.path) { |
| 282 | qCDebug(BALOO) << "Folder config entry for" << newConfig.path << "already exists" ; |
| 283 | return false; |
| 284 | } |
| 285 | |
| 286 | it = insert(position: it, x: newConfig); |
| 287 | return true; |
| 288 | } |
| 289 | |
| 290 | void FileIndexerConfig::buildFolderCache() |
| 291 | { |
| 292 | if (!m_folderCacheDirty) { |
| 293 | return; |
| 294 | } |
| 295 | |
| 296 | if (!m_devices) { |
| 297 | m_devices = new StorageDevices(this); |
| 298 | } |
| 299 | |
| 300 | FolderCache cache; |
| 301 | |
| 302 | const QStringList includeFolders = m_settings->folders(); |
| 303 | for (const auto& folder : includeFolders) { |
| 304 | if (!cache.addFolderConfig(config: {.path: folder, .isIncluded: true})) { |
| 305 | qCWarning(BALOO) << "Failed to add include folder config entry for" << folder; |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | const QStringList excludeFolders = m_settings->excludedFolders(); |
| 310 | for (const auto& folder : excludeFolders) { |
| 311 | if (!cache.addFolderConfig(config: {.path: folder, .isIncluded: false})) { |
| 312 | qCWarning(BALOO) << "Failed to add exclude folder config entry for" << folder; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | // Add all removable media and network shares as ignored unless they have |
| 317 | // been explicitly added in the include list |
| 318 | const auto allMedia = m_devices->allMedia(); |
| 319 | for (const auto& device: allMedia) { |
| 320 | const QString mountPath = device.mountPath(); |
| 321 | if (!device.isUsable() && !mountPath.isEmpty()) { |
| 322 | if (!includeFolders.contains(str: mountPath)) { |
| 323 | cache.addFolderConfig(config: {.path: mountPath, .isIncluded: false}); |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | cache.cleanup(); |
| 329 | qCDebug(BALOO) << "Folder cache:" << cache; |
| 330 | m_folderCache = cache; |
| 331 | |
| 332 | m_folderCacheDirty = false; |
| 333 | } |
| 334 | |
| 335 | void FileIndexerConfig::buildExcludeFilterRegExpCache() |
| 336 | { |
| 337 | QStringList newFilters = excludeFilters(); |
| 338 | m_excludeFilterRegExpCache.rebuildCacheFromFilterList(filters: newFilters); |
| 339 | } |
| 340 | |
| 341 | void FileIndexerConfig::buildMimeTypeCache() |
| 342 | { |
| 343 | const QStringList excludedTypes = m_settings->excludedMimetypes(); |
| 344 | m_excludeMimetypes = QSet<QString>(excludedTypes.begin(), excludedTypes.end()); |
| 345 | } |
| 346 | |
| 347 | void FileIndexerConfig::forceConfigUpdate() |
| 348 | { |
| 349 | m_settings->load(); |
| 350 | |
| 351 | m_folderCacheDirty = true; |
| 352 | buildExcludeFilterRegExpCache(); |
| 353 | buildMimeTypeCache(); |
| 354 | |
| 355 | m_indexHidden = m_settings->indexHiddenFolders(); |
| 356 | m_onlyBasicIndexing = m_settings->onlyBasicIndexing(); |
| 357 | } |
| 358 | |
| 359 | int FileIndexerConfig::databaseVersion() const |
| 360 | { |
| 361 | return m_settings->dbVersion(); |
| 362 | } |
| 363 | |
| 364 | void FileIndexerConfig::setDatabaseVersion(int version) |
| 365 | { |
| 366 | m_settings->setDbVersion(version); |
| 367 | m_settings->save(); |
| 368 | } |
| 369 | |
| 370 | bool FileIndexerConfig::indexingEnabled() const |
| 371 | { |
| 372 | return m_settings->indexingEnabled(); |
| 373 | } |
| 374 | |
| 375 | uint FileIndexerConfig::maxUncomittedFiles() const |
| 376 | { |
| 377 | return m_maxUncomittedFiles; |
| 378 | } |
| 379 | |
| 380 | } // namespace Baloo |
| 381 | |
| 382 | #include "moc_fileindexerconfig.cpp" |
| 383 | |