1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qhelpsearchindexwriter_p.h" |
5 | #include "qhelp_global.h" |
6 | #include "qhelpdbreader_p.h" |
7 | #include "qhelpenginecore.h" |
8 | |
9 | #include <QtCore/qdatastream.h> |
10 | #include <QtCore/qdatetime.h> |
11 | #include <QtCore/qdir.h> |
12 | #include <QtCore/qstringconverter.h> |
13 | #include <QtCore/qtextstream.h> |
14 | #include <QtCore/qurl.h> |
15 | #include <QtCore/qvariant.h> |
16 | #include <QtGui/qtextdocument.h> |
17 | #include <QtSql/qsqldatabase.h> |
18 | #include <QtSql/qsqldriver.h> |
19 | #include <QtSql/qsqlerror.h> |
20 | #include <QtSql/qsqlquery.h> |
21 | |
22 | QT_BEGIN_NAMESPACE |
23 | |
24 | using namespace Qt::StringLiterals; |
25 | |
26 | namespace fulltextsearch { |
27 | |
28 | const char FTS_DB_NAME[] = "fts" ; |
29 | |
30 | class Writer |
31 | { |
32 | public: |
33 | Writer(const QString &path); |
34 | ~Writer(); |
35 | |
36 | bool tryInit(bool reindex); |
37 | void flush(); |
38 | |
39 | void removeNamespace(const QString &namespaceName); |
40 | bool hasNamespace(const QString &namespaceName); |
41 | void insertDoc(const QString &namespaceName, |
42 | const QString &attributes, |
43 | const QString &url, |
44 | const QString &title, |
45 | const QString &contents); |
46 | void startTransaction(); |
47 | void endTransaction(); |
48 | |
49 | private: |
50 | void init(bool reindex); |
51 | bool hasDB(); |
52 | void clearLegacyIndex(); |
53 | |
54 | const QString m_dbDir; |
55 | QString m_uniqueId; |
56 | |
57 | bool m_needOptimize = false; |
58 | QSqlDatabase m_db; |
59 | QVariantList m_namespaces; |
60 | QVariantList m_attributes; |
61 | QVariantList m_urls; |
62 | QVariantList m_titles; |
63 | QVariantList m_contents; |
64 | }; |
65 | |
66 | Writer::Writer(const QString &path) |
67 | : m_dbDir(path) |
68 | { |
69 | clearLegacyIndex(); |
70 | QDir().mkpath(dirPath: m_dbDir); |
71 | m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: "QHelpWriter"_L1 , pointer: this); |
72 | m_db = QSqlDatabase::addDatabase(type: "QSQLITE"_L1 , connectionName: m_uniqueId); |
73 | const QString dbPath = m_dbDir + u'/' + QLatin1StringView(FTS_DB_NAME); |
74 | m_db.setDatabaseName(dbPath); |
75 | if (!m_db.open()) { |
76 | const QString &error = QHelpSearchIndexWriter::tr( |
77 | s: "Cannot open database \"%1\" using connection \"%2\": %3" ) |
78 | .arg(args: dbPath, args&: m_uniqueId, args: m_db.lastError().text()); |
79 | qWarning(msg: "%s" , qUtf8Printable(error)); |
80 | m_db = {}; |
81 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
82 | m_uniqueId.clear(); |
83 | } else { |
84 | startTransaction(); |
85 | } |
86 | } |
87 | |
88 | bool Writer::tryInit(bool reindex) |
89 | { |
90 | if (!m_db.isValid()) |
91 | return true; |
92 | |
93 | QSqlQuery query(m_db); |
94 | // HACK: we try to perform any modifying command just to check if |
95 | // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver) |
96 | if (!query.exec(query: "CREATE TABLE foo ();"_L1 ) && query.lastError().nativeErrorCode() == "5"_L1 ) // db is locked |
97 | return false; |
98 | |
99 | // HACK: clear what we have created |
100 | query.exec(query: "DROP TABLE foo;"_L1 ); |
101 | |
102 | init(reindex); |
103 | return true; |
104 | } |
105 | |
106 | bool Writer::hasDB() |
107 | { |
108 | if (!m_db.isValid()) |
109 | return false; |
110 | |
111 | QSqlQuery query(m_db); |
112 | query.prepare(query: "SELECT id FROM info LIMIT 1"_L1 ); |
113 | query.exec(); |
114 | return query.next(); |
115 | } |
116 | |
117 | void Writer::clearLegacyIndex() |
118 | { |
119 | // Clear old legacy clucene index. |
120 | // More important in case of Creator, since |
121 | // the index folder is common for all Creator versions |
122 | QDir dir(m_dbDir); |
123 | if (!dir.exists()) |
124 | return; |
125 | |
126 | const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden); |
127 | if (!list.contains(str: QLatin1StringView(FTS_DB_NAME))) { |
128 | for (const QString &item : list) |
129 | dir.remove(fileName: item); |
130 | } |
131 | } |
132 | |
133 | void Writer::init(bool reindex) |
134 | { |
135 | if (!m_db.isValid()) |
136 | return; |
137 | |
138 | QSqlQuery query(m_db); |
139 | |
140 | if (reindex && hasDB()) { |
141 | m_needOptimize = true; |
142 | |
143 | query.exec(query: "DROP TABLE titles;"_L1 ); |
144 | query.exec(query: "DROP TABLE contents;"_L1 ); |
145 | query.exec(query: "DROP TABLE info;"_L1 ); |
146 | } |
147 | |
148 | query.exec(query: "CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"_L1 ); |
149 | |
150 | query.exec(query: "CREATE VIRTUAL TABLE titles USING fts5(" |
151 | "namespace UNINDEXED, attributes UNINDEXED, " |
152 | "url UNINDEXED, title, " |
153 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1 ); |
154 | query.exec(query: "CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN " |
155 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
156 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
157 | "END;"_L1 ); |
158 | query.exec(query: "CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN " |
159 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
160 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
161 | "END;"_L1 ); |
162 | query.exec(query: "CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN " |
163 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
164 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
165 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
166 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
167 | "END;"_L1 ); |
168 | |
169 | query.exec(query: "CREATE VIRTUAL TABLE contents USING fts5(" |
170 | "namespace UNINDEXED, attributes UNINDEXED, " |
171 | "url UNINDEXED, title, data, " |
172 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1 ); |
173 | query.exec(query: "CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN " |
174 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
175 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
176 | "END;"_L1 ); |
177 | query.exec(query: "CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN " |
178 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
179 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
180 | "END;"_L1 ); |
181 | query.exec(query: "CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN " |
182 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
183 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
184 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
185 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
186 | "END;"_L1 ); |
187 | } |
188 | |
189 | Writer::~Writer() |
190 | { |
191 | if (m_db.isValid()) |
192 | m_db.close(); |
193 | m_db = {}; |
194 | if (!m_uniqueId.isEmpty()) |
195 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
196 | } |
197 | |
198 | void Writer::flush() |
199 | { |
200 | if (!m_db.isValid()) |
201 | return; |
202 | |
203 | QSqlQuery query(m_db); |
204 | query.prepare(query: "INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"_L1 ); |
205 | query.addBindValue(val: m_namespaces); |
206 | query.addBindValue(val: m_attributes); |
207 | query.addBindValue(val: m_urls); |
208 | query.addBindValue(val: m_titles); |
209 | query.addBindValue(val: m_contents); |
210 | query.execBatch(); |
211 | |
212 | m_namespaces.clear(); |
213 | m_attributes.clear(); |
214 | m_urls.clear(); |
215 | m_titles.clear(); |
216 | m_contents.clear(); |
217 | } |
218 | |
219 | void Writer::removeNamespace(const QString &namespaceName) |
220 | { |
221 | if (!m_db.isValid() || !hasNamespace(namespaceName)) // no data to delete |
222 | return; |
223 | |
224 | m_needOptimize = true; |
225 | QSqlQuery query(m_db); |
226 | query.prepare(query: "DELETE FROM info WHERE namespace = ?"_L1 ); |
227 | query.addBindValue(val: namespaceName); |
228 | query.exec(); |
229 | } |
230 | |
231 | bool Writer::hasNamespace(const QString &namespaceName) |
232 | { |
233 | if (!m_db.isValid()) |
234 | return false; |
235 | |
236 | QSqlQuery query(m_db); |
237 | query.prepare(query: "SELECT id FROM info WHERE namespace = ? LIMIT 1"_L1 ); |
238 | query.addBindValue(val: namespaceName); |
239 | query.exec(); |
240 | return query.next(); |
241 | } |
242 | |
243 | void Writer::insertDoc(const QString &namespaceName, |
244 | const QString &attributes, |
245 | const QString &url, |
246 | const QString &title, |
247 | const QString &contents) |
248 | { |
249 | m_namespaces.append(t: namespaceName); |
250 | m_attributes.append(t: attributes); |
251 | m_urls.append(t: url); |
252 | m_titles.append(t: title); |
253 | m_contents.append(t: contents); |
254 | } |
255 | |
256 | void Writer::startTransaction() |
257 | { |
258 | if (!m_db.isValid()) |
259 | return; |
260 | |
261 | m_needOptimize = false; |
262 | if (m_db.driver()->hasFeature(f: QSqlDriver::Transactions)) |
263 | m_db.transaction(); |
264 | } |
265 | |
266 | void Writer::endTransaction() |
267 | { |
268 | if (!m_db.isValid()) |
269 | return; |
270 | |
271 | QSqlQuery query(m_db); |
272 | |
273 | if (m_needOptimize) { |
274 | query.exec(query: "INSERT INTO titles(titles) VALUES('rebuild')"_L1 ); |
275 | query.exec(query: "INSERT INTO contents(contents) VALUES('rebuild')"_L1 ); |
276 | } |
277 | |
278 | if (m_db.driver()->hasFeature(f: QSqlDriver::Transactions)) |
279 | m_db.commit(); |
280 | |
281 | if (m_needOptimize) |
282 | query.exec(query: "VACUUM"_L1 ); |
283 | } |
284 | |
285 | QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
286 | { |
287 | m_mutex.lock(); |
288 | this->m_cancel = true; |
289 | m_mutex.unlock(); |
290 | wait(); |
291 | } |
292 | |
293 | void QHelpSearchIndexWriter::cancelIndexing() |
294 | { |
295 | QMutexLocker lock(&m_mutex); |
296 | m_cancel = true; |
297 | } |
298 | |
299 | void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
300 | const QString &indexFilesFolder, bool reindex) |
301 | { |
302 | wait(); |
303 | QMutexLocker lock(&m_mutex); |
304 | |
305 | m_cancel = false; |
306 | m_reindex = reindex; |
307 | m_collectionFile = collectionFile; |
308 | m_indexFilesFolder = indexFilesFolder; |
309 | |
310 | lock.unlock(); |
311 | |
312 | start(QThread::LowestPriority); |
313 | } |
314 | |
315 | static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces" ; |
316 | |
317 | static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine) |
318 | { |
319 | QMap<QString, QDateTime> indexMap; |
320 | QDataStream dataStream( |
321 | engine.customValue(key: QLatin1StringView(IndexedNamespacesKey)).toByteArray()); |
322 | dataStream >> indexMap; |
323 | return indexMap; |
324 | } |
325 | |
326 | static bool writeIndexMap(QHelpEngineCore *engine, const QMap<QString, QDateTime> &indexMap) |
327 | { |
328 | QByteArray data; |
329 | QDataStream dataStream(&data, QIODevice::ReadWrite); |
330 | dataStream << indexMap; |
331 | return engine->setCustomValue(key: QLatin1StringView(IndexedNamespacesKey), value: data); |
332 | } |
333 | |
334 | static bool clearIndexMap(QHelpEngineCore *engine) |
335 | { |
336 | return engine->removeCustomValue(key: QLatin1StringView(IndexedNamespacesKey)); |
337 | } |
338 | |
339 | void QHelpSearchIndexWriter::run() |
340 | { |
341 | QMutexLocker lock(&m_mutex); |
342 | |
343 | if (m_cancel) |
344 | return; |
345 | |
346 | const bool reindex(m_reindex); |
347 | const QString collectionFile(m_collectionFile); |
348 | const QString indexPath(m_indexFilesFolder); |
349 | |
350 | lock.unlock(); |
351 | |
352 | QHelpEngineCore engine(collectionFile, nullptr); |
353 | if (!engine.setupData()) |
354 | return; |
355 | |
356 | if (reindex) |
357 | clearIndexMap(engine: &engine); |
358 | |
359 | emit indexingStarted(); |
360 | |
361 | Writer writer(indexPath); |
362 | |
363 | while (!writer.tryInit(reindex)) |
364 | sleep(1); |
365 | |
366 | const QStringList ®isteredDocs = engine.registeredDocumentations(); |
367 | QMap<QString, QDateTime> indexMap = readIndexMap(engine); |
368 | |
369 | if (!reindex) { |
370 | for (const QString &namespaceName : registeredDocs) { |
371 | const auto it = indexMap.constFind(key: namespaceName); |
372 | if (it != indexMap.constEnd()) { |
373 | const QString path = engine.documentationFileName(namespaceName); |
374 | if (*it < QFileInfo(path).lastModified()) { |
375 | // Remove some outdated indexed stuff |
376 | indexMap.erase(it); |
377 | writer.removeNamespace(namespaceName); |
378 | } else if (!writer.hasNamespace(namespaceName)) { |
379 | // No data in fts db for namespace. |
380 | // The namespace could have been removed from fts db |
381 | // or the whole fts db have been removed |
382 | // without removing it from indexMap. |
383 | indexMap.erase(it); |
384 | } |
385 | } else { |
386 | // Needed in case namespaceName was removed from indexMap |
387 | // without removing it from fts db. |
388 | // May happen when e.g. qch file was removed manually |
389 | // without removing fts db. |
390 | writer.removeNamespace(namespaceName); |
391 | } |
392 | // TODO: we may also detect if there are any other data |
393 | // and remove it |
394 | } |
395 | } else { |
396 | indexMap.clear(); |
397 | } |
398 | |
399 | auto it = indexMap.begin(); |
400 | while (it != indexMap.end()) { |
401 | if (!registeredDocs.contains(str: it.key())) { |
402 | writer.removeNamespace(namespaceName: it.key()); |
403 | it = indexMap.erase(it); |
404 | } else { |
405 | ++it; |
406 | } |
407 | } |
408 | |
409 | for (const QString &namespaceName : registeredDocs) { |
410 | lock.relock(); |
411 | if (m_cancel) { |
412 | // store what we have done so far |
413 | writeIndexMap(engine: &engine, indexMap); |
414 | writer.endTransaction(); |
415 | emit indexingFinished(); |
416 | return; |
417 | } |
418 | lock.unlock(); |
419 | |
420 | // if indexed, continue |
421 | if (indexMap.contains(key: namespaceName)) |
422 | continue; |
423 | |
424 | const QString fileName = engine.documentationFileName(namespaceName); |
425 | QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName( |
426 | name: fileName, pointer: this), nullptr); |
427 | if (!reader.init()) |
428 | continue; |
429 | |
430 | const QString virtualFolder = reader.virtualFolder(); |
431 | |
432 | const QList<QStringList> &attributeSets = |
433 | engine.filterAttributeSets(namespaceName); |
434 | |
435 | for (const QStringList &attributes : attributeSets) { |
436 | const QString &attributesString = attributes.join(sep: u'|'); |
437 | |
438 | const auto htmlFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "html"_L1 ); |
439 | const auto htmFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "htm"_L1 ); |
440 | const auto txtFiles = reader.filesData(filterAttributes: attributes, extensionFilter: "txt"_L1 ); |
441 | |
442 | auto files = htmlFiles; |
443 | files.unite(other: htmFiles); |
444 | files.unite(other: txtFiles); |
445 | |
446 | for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) { |
447 | lock.relock(); |
448 | if (m_cancel) { |
449 | // store what we have done so far |
450 | writeIndexMap(engine: &engine, indexMap); |
451 | writer.endTransaction(); |
452 | emit indexingFinished(); |
453 | return; |
454 | } |
455 | lock.unlock(); |
456 | |
457 | const QString &file = it.key(); |
458 | const QByteArray &data = it.value(); |
459 | |
460 | if (data.isEmpty()) |
461 | continue; |
462 | |
463 | QUrl url; |
464 | url.setScheme("qthelp"_L1 ); |
465 | url.setAuthority(authority: namespaceName); |
466 | url.setPath(path: u'/' + virtualFolder + u'/' + file); |
467 | |
468 | if (url.hasFragment()) |
469 | url.setFragment(fragment: {}); |
470 | |
471 | const QString &fullFileName = url.toString(); |
472 | if (!fullFileName.endsWith(s: ".html"_L1 ) && !fullFileName.endsWith(s: ".htm"_L1 ) |
473 | && !fullFileName.endsWith(s: ".txt"_L1 )) { |
474 | continue; |
475 | } |
476 | |
477 | QTextStream s(data); |
478 | auto encoding = QStringDecoder::encodingForHtml(data); |
479 | if (encoding) |
480 | s.setEncoding(*encoding); |
481 | |
482 | const QString &text = s.readAll(); |
483 | if (text.isEmpty()) |
484 | continue; |
485 | |
486 | QString title; |
487 | QString contents; |
488 | if (fullFileName.endsWith(s: ".txt"_L1 )) { |
489 | title = fullFileName.mid(position: fullFileName.lastIndexOf(c: u'/') + 1); |
490 | contents = text.toHtmlEscaped(); |
491 | } else { |
492 | QTextDocument doc; |
493 | doc.setHtml(text); |
494 | |
495 | title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped(); |
496 | contents = doc.toPlainText().toHtmlEscaped(); |
497 | } |
498 | |
499 | writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents); |
500 | } |
501 | } |
502 | writer.flush(); |
503 | const QString &path = engine.documentationFileName(namespaceName); |
504 | indexMap.insert(key: namespaceName, value: QFileInfo(path).lastModified()); |
505 | } |
506 | |
507 | writeIndexMap(engine: &engine, indexMap); |
508 | |
509 | writer.endTransaction(); |
510 | emit indexingFinished(); |
511 | } |
512 | |
513 | } // namespace fulltextsearch |
514 | |
515 | QT_END_NAMESPACE |
516 | |