| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #include "qhelpsearchindexwriter_p.h" |
| 5 | #include "qhelp_global.h" |
| 6 | #include "qhelpdbreader_p.h" |
| 7 | #include "qhelpenginecore.h" |
| 8 | |
| 9 | #include <QtTools/qttools-config.h> |
| 10 | #include <QtCore/qdatastream.h> |
| 11 | #include <QtCore/qdatetime.h> |
| 12 | #include <QtCore/qdir.h> |
| 13 | #include <QtCore/qstringconverter.h> |
| 14 | #include <QtCore/qtextstream.h> |
| 15 | #include <QtCore/qurl.h> |
| 16 | #include <QtCore/qvariant.h> |
| 17 | #if QT_CONFIG(fullqthelp) |
| 18 | # include <QtGui/qtextdocument.h> |
| 19 | #endif |
| 20 | #include <QtSql/qsqldatabase.h> |
| 21 | #include <QtSql/qsqldriver.h> |
| 22 | #include <QtSql/qsqlerror.h> |
| 23 | #include <QtSql/qsqlquery.h> |
| 24 | |
| 25 | QT_BEGIN_NAMESPACE |
| 26 | |
| 27 | using namespace Qt::StringLiterals; |
| 28 | |
| 29 | namespace fulltextsearch { |
| 30 | |
| 31 | const char FTS_DB_NAME[] = "fts" ; |
| 32 | |
| 33 | class Writer |
| 34 | { |
| 35 | public: |
| 36 | Writer(const QString &path); |
| 37 | ~Writer(); |
| 38 | |
| 39 | bool tryInit(bool reindex); |
| 40 | void flush(); |
| 41 | |
| 42 | void removeNamespace(const QString &namespaceName); |
| 43 | bool hasNamespace(const QString &namespaceName); |
| 44 | void insertDoc(const QString &namespaceName, |
| 45 | const QString &attributes, |
| 46 | const QString &url, |
| 47 | const QString &title, |
| 48 | const QString &contents); |
| 49 | void startTransaction(); |
| 50 | void endTransaction(); |
| 51 | |
| 52 | private: |
| 53 | void init(bool reindex); |
| 54 | bool hasDB(); |
| 55 | void clearLegacyIndex(); |
| 56 | |
| 57 | const QString m_dbDir; |
| 58 | QString m_uniqueId; |
| 59 | |
| 60 | bool m_needOptimize = false; |
| 61 | QSqlDatabase m_db; |
| 62 | QVariantList m_namespaces; |
| 63 | QVariantList m_attributes; |
| 64 | QVariantList m_urls; |
| 65 | QVariantList m_titles; |
| 66 | QVariantList m_contents; |
| 67 | }; |
| 68 | |
| 69 | Writer::Writer(const QString &path) |
| 70 | : m_dbDir(path) |
| 71 | { |
| 72 | clearLegacyIndex(); |
| 73 | QDir().mkpath(dirPath: m_dbDir); |
| 74 | m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: "QHelpWriter"_L1 , pointer: this); |
| 75 | m_db = QSqlDatabase::addDatabase(type: "QSQLITE"_L1 , connectionName: m_uniqueId); |
| 76 | const QString dbPath = m_dbDir + u'/' + QLatin1StringView(FTS_DB_NAME); |
| 77 | m_db.setDatabaseName(dbPath); |
| 78 | if (!m_db.open()) { |
| 79 | const QString &error = QHelpSearchIndexWriter::tr( |
| 80 | s: "Cannot open database \"%1\" using connection \"%2\": %3" ) |
| 81 | .arg(args: dbPath, args&: m_uniqueId, args: m_db.lastError().text()); |
| 82 | qWarning(msg: "%s" , qUtf8Printable(error)); |
| 83 | m_db = {}; |
| 84 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
| 85 | m_uniqueId.clear(); |
| 86 | } else { |
| 87 | startTransaction(); |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | bool Writer::tryInit(bool reindex) |
| 92 | { |
| 93 | if (!m_db.isValid()) |
| 94 | return true; |
| 95 | |
| 96 | QSqlQuery query(m_db); |
| 97 | // HACK: we try to perform any modifying command just to check if |
| 98 | // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver) |
| 99 | if (!query.exec(query: "CREATE TABLE foo ();"_L1 ) && query.lastError().nativeErrorCode() == "5"_L1 ) // db is locked |
| 100 | return false; |
| 101 | |
| 102 | // HACK: clear what we have created |
| 103 | query.exec(query: "DROP TABLE foo;"_L1 ); |
| 104 | |
| 105 | init(reindex); |
| 106 | return true; |
| 107 | } |
| 108 | |
| 109 | bool Writer::hasDB() |
| 110 | { |
| 111 | if (!m_db.isValid()) |
| 112 | return false; |
| 113 | |
| 114 | QSqlQuery query(m_db); |
| 115 | query.prepare(query: "SELECT id FROM info LIMIT 1"_L1 ); |
| 116 | query.exec(); |
| 117 | return query.next(); |
| 118 | } |
| 119 | |
| 120 | void Writer::clearLegacyIndex() |
| 121 | { |
| 122 | // Clear old legacy clucene index. |
| 123 | // More important in case of Creator, since |
| 124 | // the index folder is common for all Creator versions |
| 125 | QDir dir(m_dbDir); |
| 126 | if (!dir.exists()) |
| 127 | return; |
| 128 | |
| 129 | const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden); |
| 130 | if (!list.contains(str: QLatin1StringView(FTS_DB_NAME))) { |
| 131 | for (const QString &item : list) |
| 132 | dir.remove(fileName: item); |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | void Writer::init(bool reindex) |
| 137 | { |
| 138 | if (!m_db.isValid()) |
| 139 | return; |
| 140 | |
| 141 | QSqlQuery query(m_db); |
| 142 | |
| 143 | if (reindex && hasDB()) { |
| 144 | m_needOptimize = true; |
| 145 | |
| 146 | query.exec(query: "DROP TABLE titles;"_L1 ); |
| 147 | query.exec(query: "DROP TABLE contents;"_L1 ); |
| 148 | query.exec(query: "DROP TABLE info;"_L1 ); |
| 149 | } |
| 150 | |
| 151 | query.exec(query: "CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"_L1 ); |
| 152 | |
| 153 | query.exec(query: "CREATE VIRTUAL TABLE titles USING fts5(" |
| 154 | "namespace UNINDEXED, attributes UNINDEXED, " |
| 155 | "url UNINDEXED, title, " |
| 156 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1 ); |
| 157 | query.exec(query: "CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN " |
| 158 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
| 159 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
| 160 | "END;"_L1 ); |
| 161 | query.exec(query: "CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN " |
| 162 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
| 163 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
| 164 | "END;"_L1 ); |
| 165 | query.exec(query: "CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN " |
| 166 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
| 167 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
| 168 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
| 169 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
| 170 | "END;"_L1 ); |
| 171 | |
| 172 | query.exec(query: "CREATE VIRTUAL TABLE contents USING fts5(" |
| 173 | "namespace UNINDEXED, attributes UNINDEXED, " |
| 174 | "url UNINDEXED, title, data, " |
| 175 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1 ); |
| 176 | query.exec(query: "CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN " |
| 177 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
| 178 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
| 179 | "END;"_L1 ); |
| 180 | query.exec(query: "CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN " |
| 181 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
| 182 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
| 183 | "END;"_L1 ); |
| 184 | query.exec(query: "CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN " |
| 185 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
| 186 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
| 187 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
| 188 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
| 189 | "END;"_L1 ); |
| 190 | } |
| 191 | |
| 192 | Writer::~Writer() |
| 193 | { |
| 194 | if (m_db.isValid()) |
| 195 | m_db.close(); |
| 196 | m_db = {}; |
| 197 | if (!m_uniqueId.isEmpty()) |
| 198 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
| 199 | } |
| 200 | |
| 201 | void Writer::flush() |
| 202 | { |
| 203 | if (!m_db.isValid()) |
| 204 | return; |
| 205 | |
| 206 | QSqlQuery query(m_db); |
| 207 | query.prepare(query: "INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"_L1 ); |
| 208 | query.addBindValue(val: m_namespaces); |
| 209 | query.addBindValue(val: m_attributes); |
| 210 | query.addBindValue(val: m_urls); |
| 211 | query.addBindValue(val: m_titles); |
| 212 | query.addBindValue(val: m_contents); |
| 213 | query.execBatch(); |
| 214 | |
| 215 | m_namespaces.clear(); |
| 216 | m_attributes.clear(); |
| 217 | m_urls.clear(); |
| 218 | m_titles.clear(); |
| 219 | m_contents.clear(); |
| 220 | } |
| 221 | |
| 222 | void Writer::removeNamespace(const QString &namespaceName) |
| 223 | { |
| 224 | if (!m_db.isValid() || !hasNamespace(namespaceName)) // no data to delete |
| 225 | return; |
| 226 | |
| 227 | m_needOptimize = true; |
| 228 | QSqlQuery query(m_db); |
| 229 | query.prepare(query: "DELETE FROM info WHERE namespace = ?"_L1 ); |
| 230 | query.addBindValue(val: namespaceName); |
| 231 | query.exec(); |
| 232 | } |
| 233 | |
| 234 | bool Writer::hasNamespace(const QString &namespaceName) |
| 235 | { |
| 236 | if (!m_db.isValid()) |
| 237 | return false; |
| 238 | |
| 239 | QSqlQuery query(m_db); |
| 240 | query.prepare(query: "SELECT id FROM info WHERE namespace = ? LIMIT 1"_L1 ); |
| 241 | query.addBindValue(val: namespaceName); |
| 242 | query.exec(); |
| 243 | return query.next(); |
| 244 | } |
| 245 | |
| 246 | void Writer::insertDoc(const QString &namespaceName, |
| 247 | const QString &attributes, |
| 248 | const QString &url, |
| 249 | const QString &title, |
| 250 | const QString &contents) |
| 251 | { |
| 252 | m_namespaces.append(t: namespaceName); |
| 253 | m_attributes.append(t: attributes); |
| 254 | m_urls.append(t: url); |
| 255 | m_titles.append(t: title); |
| 256 | m_contents.append(t: contents); |
| 257 | } |
| 258 | |
| 259 | void Writer::startTransaction() |
| 260 | { |
| 261 | if (!m_db.isValid()) |
| 262 | return; |
| 263 | |
| 264 | m_needOptimize = false; |
| 265 | if (m_db.driver()->hasFeature(f: QSqlDriver::Transactions)) |
| 266 | m_db.transaction(); |
| 267 | } |
| 268 | |
| 269 | void Writer::endTransaction() |
| 270 | { |
| 271 | if (!m_db.isValid()) |
| 272 | return; |
| 273 | |
| 274 | QSqlQuery query(m_db); |
| 275 | |
| 276 | if (m_needOptimize) { |
| 277 | query.exec(query: "INSERT INTO titles(titles) VALUES('rebuild')"_L1 ); |
| 278 | query.exec(query: "INSERT INTO contents(contents) VALUES('rebuild')"_L1 ); |
| 279 | } |
| 280 | |
| 281 | if (m_db.driver()->hasFeature(f: QSqlDriver::Transactions)) |
| 282 | m_db.commit(); |
| 283 | |
| 284 | if (m_needOptimize) |
| 285 | query.exec(query: "VACUUM"_L1 ); |
| 286 | } |
| 287 | |
| 288 | QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
| 289 | { |
| 290 | m_mutex.lock(); |
| 291 | this->m_cancel = true; |
| 292 | m_mutex.unlock(); |
| 293 | wait(); |
| 294 | } |
| 295 | |
| 296 | void QHelpSearchIndexWriter::cancelIndexing() |
| 297 | { |
| 298 | QMutexLocker lock(&m_mutex); |
| 299 | m_cancel = true; |
| 300 | } |
| 301 | |
| 302 | void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
| 303 | const QString &indexFilesFolder, bool reindex) |
| 304 | { |
| 305 | wait(); |
| 306 | QMutexLocker lock(&m_mutex); |
| 307 | |
| 308 | m_cancel = false; |
| 309 | m_reindex = reindex; |
| 310 | m_collectionFile = collectionFile; |
| 311 | m_indexFilesFolder = indexFilesFolder; |
| 312 | |
| 313 | lock.unlock(); |
| 314 | |
| 315 | start(QThread::LowestPriority); |
| 316 | } |
| 317 | |
| 318 | static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces" ; |
| 319 | |
| 320 | static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine) |
| 321 | { |
| 322 | QMap<QString, QDateTime> indexMap; |
| 323 | QDataStream dataStream( |
| 324 | engine.customValue(key: QLatin1StringView(IndexedNamespacesKey)).toByteArray()); |
| 325 | dataStream >> indexMap; |
| 326 | return indexMap; |
| 327 | } |
| 328 | |
| 329 | static bool writeIndexMap(QHelpEngineCore *engine, const QMap<QString, QDateTime> &indexMap) |
| 330 | { |
| 331 | QByteArray data; |
| 332 | QDataStream dataStream(&data, QIODevice::ReadWrite); |
| 333 | dataStream << indexMap; |
| 334 | return engine->setCustomValue(key: QLatin1StringView(IndexedNamespacesKey), value: data); |
| 335 | } |
| 336 | |
| 337 | static bool clearIndexMap(QHelpEngineCore *engine) |
| 338 | { |
| 339 | return engine->removeCustomValue(key: QLatin1StringView(IndexedNamespacesKey)); |
| 340 | } |
| 341 | |
| 342 | void QHelpSearchIndexWriter::run() |
| 343 | { |
| 344 | QMutexLocker lock(&m_mutex); |
| 345 | |
| 346 | if (m_cancel) |
| 347 | return; |
| 348 | |
| 349 | const bool reindex(m_reindex); |
| 350 | const QString collectionFile(m_collectionFile); |
| 351 | const QString indexPath(m_indexFilesFolder); |
| 352 | |
| 353 | lock.unlock(); |
| 354 | |
| 355 | QHelpEngineCore engine(collectionFile, nullptr); |
| 356 | if (!engine.setupData()) |
| 357 | return; |
| 358 | |
| 359 | if (reindex) |
| 360 | clearIndexMap(engine: &engine); |
| 361 | |
| 362 | emit indexingStarted(); |
| 363 | |
| 364 | Writer writer(indexPath); |
| 365 | |
| 366 | while (!writer.tryInit(reindex)) |
| 367 | sleep(1); |
| 368 | |
| 369 | const QStringList ®isteredDocs = engine.registeredDocumentations(); |
| 370 | QMap<QString, QDateTime> indexMap = readIndexMap(engine); |
| 371 | |
| 372 | if (!reindex) { |
| 373 | for (const QString &namespaceName : registeredDocs) { |
| 374 | const auto it = indexMap.constFind(key: namespaceName); |
| 375 | if (it != indexMap.constEnd()) { |
| 376 | const QString path = engine.documentationFileName(namespaceName); |
| 377 | if (*it < QFileInfo(path).lastModified()) { |
| 378 | // Remove some outdated indexed stuff |
| 379 | indexMap.erase(it); |
| 380 | writer.removeNamespace(namespaceName); |
| 381 | } else if (!writer.hasNamespace(namespaceName)) { |
| 382 | // No data in fts db for namespace. |
| 383 | // The namespace could have been removed from fts db |
| 384 | // or the whole fts db have been removed |
| 385 | // without removing it from indexMap. |
| 386 | indexMap.erase(it); |
| 387 | } |
| 388 | } else { |
| 389 | // Needed in case namespaceName was removed from indexMap |
| 390 | // without removing it from fts db. |
| 391 | // May happen when e.g. qch file was removed manually |
| 392 | // without removing fts db. |
| 393 | writer.removeNamespace(namespaceName); |
| 394 | } |
| 395 | // TODO: we may also detect if there are any other data |
| 396 | // and remove it |
| 397 | } |
| 398 | } else { |
| 399 | indexMap.clear(); |
| 400 | } |
| 401 | |
| 402 | auto it = indexMap.begin(); |
| 403 | while (it != indexMap.end()) { |
| 404 | if (!registeredDocs.contains(str: it.key())) { |
| 405 | writer.removeNamespace(namespaceName: it.key()); |
| 406 | it = indexMap.erase(it); |
| 407 | } else { |
| 408 | ++it; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | for (const QString &namespaceName : registeredDocs) { |
| 413 | lock.relock(); |
| 414 | if (m_cancel) { |
| 415 | // store what we have done so far |
| 416 | writeIndexMap(engine: &engine, indexMap); |
| 417 | writer.endTransaction(); |
| 418 | emit indexingFinished(); |
| 419 | return; |
| 420 | } |
| 421 | lock.unlock(); |
| 422 | |
| 423 | // if indexed, continue |
| 424 | if (indexMap.contains(key: namespaceName)) |
| 425 | continue; |
| 426 | |
| 427 | const QString fileName = engine.documentationFileName(namespaceName); |
| 428 | QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName( |
| 429 | name: fileName, pointer: this), nullptr); |
| 430 | if (!reader.init()) |
| 431 | continue; |
| 432 | |
| 433 | const QString virtualFolder = reader.virtualFolder(); |
| 434 | |
| 435 | const QList<QStringList> &attributeSets = |
| 436 | engine.filterAttributeSets(namespaceName); |
| 437 | |
| 438 | for (const QStringList &attributes : attributeSets) { |
| 439 | const QString &attributesString = attributes.join(sep: u'|'); |
| 440 | |
| 441 | const auto htmlFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "html"_L1 ); |
| 442 | const auto htmFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "htm"_L1 ); |
| 443 | const auto txtFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "txt"_L1 ); |
| 444 | |
| 445 | auto files = htmlFiles; |
| 446 | files.unite(other: htmFiles); |
| 447 | files.unite(other: txtFiles); |
| 448 | |
| 449 | for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) { |
| 450 | lock.relock(); |
| 451 | if (m_cancel) { |
| 452 | // store what we have done so far |
| 453 | writeIndexMap(engine: &engine, indexMap); |
| 454 | writer.endTransaction(); |
| 455 | emit indexingFinished(); |
| 456 | return; |
| 457 | } |
| 458 | lock.unlock(); |
| 459 | |
| 460 | const QString &file = it.key(); |
| 461 | const QByteArray &data = it.value(); |
| 462 | |
| 463 | if (data.isEmpty()) |
| 464 | continue; |
| 465 | |
| 466 | QUrl url; |
| 467 | url.setScheme("qthelp"_L1 ); |
| 468 | url.setAuthority(authority: namespaceName); |
| 469 | url.setPath(path: u'/' + virtualFolder + u'/' + file); |
| 470 | |
| 471 | if (url.hasFragment()) |
| 472 | url.setFragment(fragment: {}); |
| 473 | |
| 474 | const QString &fullFileName = url.toString(); |
| 475 | if (!fullFileName.endsWith(s: ".html"_L1 ) && !fullFileName.endsWith(s: ".htm"_L1 ) |
| 476 | && !fullFileName.endsWith(s: ".txt"_L1 )) { |
| 477 | continue; |
| 478 | } |
| 479 | |
| 480 | QTextStream s(data); |
| 481 | auto encoding = QStringDecoder::encodingForHtml(data); |
| 482 | if (encoding) |
| 483 | s.setEncoding(*encoding); |
| 484 | |
| 485 | const QString &text = s.readAll(); |
| 486 | if (text.isEmpty()) |
| 487 | continue; |
| 488 | |
| 489 | QString title; |
| 490 | QString contents; |
| 491 | if (fullFileName.endsWith(s: ".txt"_L1 )) { |
| 492 | title = fullFileName.mid(position: fullFileName.lastIndexOf(c: u'/') + 1); |
| 493 | contents = text.toHtmlEscaped(); |
| 494 | #if QT_CONFIG(fullqthelp) |
| 495 | } else { |
| 496 | QTextDocument doc; |
| 497 | doc.setHtml(text); |
| 498 | |
| 499 | title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped(); |
| 500 | contents = doc.toPlainText().toHtmlEscaped(); |
| 501 | #endif |
| 502 | } |
| 503 | |
| 504 | writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents); |
| 505 | } |
| 506 | } |
| 507 | writer.flush(); |
| 508 | const QString &path = engine.documentationFileName(namespaceName); |
| 509 | indexMap.insert(key: namespaceName, value: QFileInfo(path).lastModified()); |
| 510 | } |
| 511 | |
| 512 | writeIndexMap(engine: &engine, indexMap); |
| 513 | |
| 514 | writer.endTransaction(); |
| 515 | emit indexingFinished(); |
| 516 | } |
| 517 | |
| 518 | } // namespace fulltextsearch |
| 519 | |
| 520 | QT_END_NAMESPACE |
| 521 | |