1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qhelpsearchindexwriter_default_p.h" |
5 | #include "qhelp_global.h" |
6 | #include "qhelpenginecore.h" |
7 | #include "qhelpdbreader_p.h" |
8 | |
9 | #include <QtCore/QDataStream> |
10 | #include <QtCore/QDateTime> |
11 | #include <QtCore/QDir> |
12 | #include <QtCore/QStringDecoder> |
13 | #include <QtCore/QTextStream> |
14 | #include <QtCore/QSet> |
15 | #include <QtCore/QUrl> |
16 | #include <QtCore/QVariant> |
17 | #include <QtSql/QSqlDatabase> |
18 | #include <QtSql/QSqlDriver> |
19 | #include <QtSql/QSqlError> |
20 | #include <QtSql/QSqlQuery> |
21 | |
22 | #include <QTextDocument> |
23 | |
24 | QT_BEGIN_NAMESPACE |
25 | |
26 | namespace fulltextsearch { |
27 | namespace qt { |
28 | |
29 | const char FTS_DB_NAME[] = "fts" ; |
30 | |
31 | Writer::Writer(const QString &path) |
32 | : m_dbDir(path) |
33 | { |
34 | clearLegacyIndex(); |
35 | QDir().mkpath(dirPath: m_dbDir); |
36 | m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: QLatin1String("QHelpWriter" ), pointer: this); |
37 | m_db = new QSqlDatabase(); |
38 | *m_db = QSqlDatabase::addDatabase(type: QLatin1String("QSQLITE" ), connectionName: m_uniqueId); |
39 | const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME); |
40 | m_db->setDatabaseName(dbPath); |
41 | if (!m_db->open()) { |
42 | const QString &error = QHelpSearchIndexWriter::tr(s: "Cannot open database \"%1\" using connection \"%2\": %3" ) |
43 | .arg(args: dbPath, args&: m_uniqueId, args: m_db->lastError().text()); |
44 | qWarning(msg: "%s" , qUtf8Printable(error)); |
45 | delete m_db; |
46 | m_db = nullptr; |
47 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
48 | m_uniqueId = QString(); |
49 | } else { |
50 | startTransaction(); |
51 | } |
52 | } |
53 | |
54 | bool Writer::tryInit(bool reindex) |
55 | { |
56 | if (!m_db) |
57 | return true; |
58 | |
59 | QSqlQuery query(*m_db); |
60 | // HACK: we try to perform any modifying command just to check if |
61 | // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver) |
62 | if (!query.exec(query: QLatin1String("CREATE TABLE foo ();" ))) { |
63 | if (query.lastError().nativeErrorCode() == QLatin1String("5" )) // db is locked |
64 | return false; |
65 | } |
66 | // HACK: clear what we have created |
67 | query.exec(query: QLatin1String("DROP TABLE foo;" )); |
68 | |
69 | init(reindex); |
70 | return true; |
71 | } |
72 | |
73 | bool Writer::hasDB() |
74 | { |
75 | if (!m_db) |
76 | return false; |
77 | |
78 | QSqlQuery query(*m_db); |
79 | |
80 | query.prepare(query: QLatin1String("SELECT id FROM info LIMIT 1" )); |
81 | query.exec(); |
82 | |
83 | return query.next(); |
84 | } |
85 | |
86 | void Writer::clearLegacyIndex() |
87 | { |
88 | // Clear old legacy clucene index. |
89 | // More important in case of Creator, since |
90 | // the index folder is common for all Creator versions |
91 | QDir dir(m_dbDir); |
92 | if (!dir.exists()) |
93 | return; |
94 | |
95 | const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden); |
96 | if (!list.contains(str: QLatin1String(FTS_DB_NAME))) { |
97 | for (const QString &item : list) |
98 | dir.remove(fileName: item); |
99 | } |
100 | } |
101 | |
102 | void Writer::init(bool reindex) |
103 | { |
104 | if (!m_db) |
105 | return; |
106 | |
107 | QSqlQuery query(*m_db); |
108 | |
109 | if (reindex && hasDB()) { |
110 | m_needOptimize = true; |
111 | |
112 | query.exec(query: QLatin1String("DROP TABLE titles;" )); |
113 | query.exec(query: QLatin1String("DROP TABLE contents;" )); |
114 | query.exec(query: QLatin1String("DROP TABLE info;" )); |
115 | } |
116 | |
117 | query.exec(query: QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);" )); |
118 | |
119 | query.exec(query: QLatin1String("CREATE VIRTUAL TABLE titles USING fts5(" |
120 | "namespace UNINDEXED, attributes UNINDEXED, " |
121 | "url UNINDEXED, title, " |
122 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');" )); |
123 | query.exec(query: QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN " |
124 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
125 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
126 | "END;" )); |
127 | query.exec(query: QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN " |
128 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
129 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
130 | "END;" )); |
131 | query.exec(query: QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN " |
132 | "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
133 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
134 | "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
135 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
136 | "END;" )); |
137 | |
138 | query.exec(query: QLatin1String("CREATE VIRTUAL TABLE contents USING fts5(" |
139 | "namespace UNINDEXED, attributes UNINDEXED, " |
140 | "url UNINDEXED, title, data, " |
141 | "tokenize = 'porter unicode61', content = 'info', content_rowid='id');" )); |
142 | query.exec(query: QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN " |
143 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
144 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
145 | "END;" )); |
146 | query.exec(query: QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN " |
147 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
148 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
149 | "END;" )); |
150 | query.exec(query: QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN " |
151 | "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
152 | "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
153 | "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
154 | "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
155 | "END;" )); |
156 | } |
157 | |
158 | Writer::~Writer() |
159 | { |
160 | if (m_db) { |
161 | m_db->close(); |
162 | delete m_db; |
163 | } |
164 | |
165 | if (!m_uniqueId.isEmpty()) |
166 | QSqlDatabase::removeDatabase(connectionName: m_uniqueId); |
167 | } |
168 | |
169 | void Writer::flush() |
170 | { |
171 | if (!m_db) |
172 | return; |
173 | |
174 | QSqlQuery query(*m_db); |
175 | |
176 | query.prepare(query: QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)" )); |
177 | query.addBindValue(val: m_namespaces); |
178 | query.addBindValue(val: m_attributes); |
179 | query.addBindValue(val: m_urls); |
180 | query.addBindValue(val: m_titles); |
181 | query.addBindValue(val: m_contents); |
182 | query.execBatch(); |
183 | |
184 | m_namespaces = QVariantList(); |
185 | m_attributes = QVariantList(); |
186 | m_urls = QVariantList(); |
187 | m_titles = QVariantList(); |
188 | m_contents = QVariantList(); |
189 | } |
190 | |
191 | void Writer::removeNamespace(const QString &namespaceName) |
192 | { |
193 | if (!m_db) |
194 | return; |
195 | |
196 | if (!hasNamespace(namespaceName)) |
197 | return; // no data to delete |
198 | |
199 | m_needOptimize = true; |
200 | |
201 | QSqlQuery query(*m_db); |
202 | |
203 | query.prepare(query: QLatin1String("DELETE FROM info WHERE namespace = ?" )); |
204 | query.addBindValue(val: namespaceName); |
205 | query.exec(); |
206 | } |
207 | |
208 | bool Writer::hasNamespace(const QString &namespaceName) |
209 | { |
210 | if (!m_db) |
211 | return false; |
212 | |
213 | QSqlQuery query(*m_db); |
214 | |
215 | query.prepare(query: QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1" )); |
216 | query.addBindValue(val: namespaceName); |
217 | query.exec(); |
218 | |
219 | return query.next(); |
220 | } |
221 | |
222 | void Writer::insertDoc(const QString &namespaceName, |
223 | const QString &attributes, |
224 | const QString &url, |
225 | const QString &title, |
226 | const QString &contents) |
227 | { |
228 | m_namespaces.append(t: namespaceName); |
229 | m_attributes.append(t: attributes); |
230 | m_urls.append(t: url); |
231 | m_titles.append(t: title); |
232 | m_contents.append(t: contents); |
233 | } |
234 | |
235 | void Writer::startTransaction() |
236 | { |
237 | if (!m_db) |
238 | return; |
239 | |
240 | m_needOptimize = false; |
241 | if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions)) |
242 | m_db->transaction(); |
243 | } |
244 | |
245 | void Writer::endTransaction() |
246 | { |
247 | if (!m_db) |
248 | return; |
249 | |
250 | QSqlQuery query(*m_db); |
251 | |
252 | if (m_needOptimize) { |
253 | query.exec(query: QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')" )); |
254 | query.exec(query: QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')" )); |
255 | } |
256 | |
257 | if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions)) |
258 | m_db->commit(); |
259 | |
260 | if (m_needOptimize) |
261 | query.exec(query: QLatin1String("VACUUM" )); |
262 | } |
263 | |
264 | QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
265 | : QThread() |
266 | , m_cancel(false) |
267 | { |
268 | } |
269 | |
270 | QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
271 | { |
272 | m_mutex.lock(); |
273 | this->m_cancel = true; |
274 | m_mutex.unlock(); |
275 | |
276 | wait(); |
277 | } |
278 | |
279 | void QHelpSearchIndexWriter::cancelIndexing() |
280 | { |
281 | QMutexLocker lock(&m_mutex); |
282 | m_cancel = true; |
283 | } |
284 | |
285 | void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
286 | const QString &indexFilesFolder, |
287 | bool reindex) |
288 | { |
289 | wait(); |
290 | QMutexLocker lock(&m_mutex); |
291 | |
292 | m_cancel = false; |
293 | m_reindex = reindex; |
294 | m_collectionFile = collectionFile; |
295 | m_indexFilesFolder = indexFilesFolder; |
296 | |
297 | lock.unlock(); |
298 | |
299 | start(QThread::LowestPriority); |
300 | } |
301 | |
302 | static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces" ; |
303 | |
304 | static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine) |
305 | { |
306 | QMap<QString, QDateTime> indexMap; |
307 | QDataStream dataStream(engine.customValue( |
308 | key: QLatin1String(IndexedNamespacesKey)).toByteArray()); |
309 | dataStream >> indexMap; |
310 | return indexMap; |
311 | } |
312 | |
313 | static bool writeIndexMap(QHelpEngineCore *engine, |
314 | const QMap<QString, QDateTime> &indexMap) |
315 | { |
316 | QByteArray data; |
317 | |
318 | QDataStream dataStream(&data, QIODevice::ReadWrite); |
319 | dataStream << indexMap; |
320 | |
321 | return engine->setCustomValue( |
322 | key: QLatin1String(IndexedNamespacesKey), value: data); |
323 | } |
324 | |
325 | static bool clearIndexMap(QHelpEngineCore *engine) |
326 | { |
327 | return engine->removeCustomValue(key: QLatin1String(IndexedNamespacesKey)); |
328 | } |
329 | |
330 | void QHelpSearchIndexWriter::run() |
331 | { |
332 | QMutexLocker lock(&m_mutex); |
333 | |
334 | if (m_cancel) |
335 | return; |
336 | |
337 | const bool reindex(m_reindex); |
338 | const QString collectionFile(m_collectionFile); |
339 | const QString indexPath(m_indexFilesFolder); |
340 | |
341 | lock.unlock(); |
342 | |
343 | QHelpEngineCore engine(collectionFile, nullptr); |
344 | if (!engine.setupData()) |
345 | return; |
346 | |
347 | if (reindex) |
348 | clearIndexMap(engine: &engine); |
349 | |
350 | emit indexingStarted(); |
351 | |
352 | Writer writer(indexPath); |
353 | |
354 | while (!writer.tryInit(reindex)) |
355 | sleep(1); |
356 | |
357 | const QStringList ®isteredDocs = engine.registeredDocumentations(); |
358 | QMap<QString, QDateTime> indexMap = readIndexMap(engine); |
359 | |
360 | if (!reindex) { |
361 | for (const QString &namespaceName : registeredDocs) { |
362 | if (indexMap.contains(key: namespaceName)) { |
363 | const QString path = engine.documentationFileName(namespaceName); |
364 | if (indexMap.value(key: namespaceName) < QFileInfo(path).lastModified()) { |
365 | // Remove some outdated indexed stuff |
366 | indexMap.remove(key: namespaceName); |
367 | writer.removeNamespace(namespaceName); |
368 | } else if (!writer.hasNamespace(namespaceName)) { |
369 | // No data in fts db for namespace. |
370 | // The namespace could have been removed from fts db |
371 | // or the whole fts db have been removed |
372 | // without removing it from indexMap. |
373 | indexMap.remove(key: namespaceName); |
374 | } |
375 | } else { |
376 | // Needed in case namespaceName was removed from indexMap |
377 | // without removing it from fts db. |
378 | // May happen when e.g. qch file was removed manually |
379 | // without removing fts db. |
380 | writer.removeNamespace(namespaceName); |
381 | } |
382 | // TODO: we may also detect if there are any other data |
383 | // and remove it |
384 | } |
385 | } else { |
386 | indexMap.clear(); |
387 | } |
388 | |
389 | for (const QString &namespaceName : indexMap.keys()) { |
390 | if (!registeredDocs.contains(str: namespaceName)) { |
391 | indexMap.remove(key: namespaceName); |
392 | writer.removeNamespace(namespaceName); |
393 | } |
394 | } |
395 | |
396 | for (const QString &namespaceName : registeredDocs) { |
397 | lock.relock(); |
398 | if (m_cancel) { |
399 | // store what we have done so far |
400 | writeIndexMap(engine: &engine, indexMap); |
401 | writer.endTransaction(); |
402 | emit indexingFinished(); |
403 | return; |
404 | } |
405 | lock.unlock(); |
406 | |
407 | // if indexed, continue |
408 | if (indexMap.contains(key: namespaceName)) |
409 | continue; |
410 | |
411 | const QString fileName = engine.documentationFileName(namespaceName); |
412 | QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName( |
413 | name: fileName, pointer: this), nullptr); |
414 | if (!reader.init()) |
415 | continue; |
416 | |
417 | const QString virtualFolder = reader.virtualFolder(); |
418 | |
419 | const QList<QStringList> &attributeSets = |
420 | engine.filterAttributeSets(namespaceName); |
421 | |
422 | for (const QStringList &attributes : attributeSets) { |
423 | const QString &attributesString = attributes.join(sep: QLatin1Char('|')); |
424 | |
425 | const QMultiMap<QString, QByteArray> htmlFiles = |
426 | reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("html" )); |
427 | const QMultiMap<QString, QByteArray> htmFiles = |
428 | reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("htm" )); |
429 | const QMultiMap<QString, QByteArray> txtFiles = |
430 | reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("txt" )); |
431 | |
432 | QMultiMap<QString, QByteArray> files = htmlFiles; |
433 | files.unite(other: htmFiles); |
434 | files.unite(other: txtFiles); |
435 | |
436 | for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) { |
437 | lock.relock(); |
438 | if (m_cancel) { |
439 | // store what we have done so far |
440 | writeIndexMap(engine: &engine, indexMap); |
441 | writer.endTransaction(); |
442 | emit indexingFinished(); |
443 | return; |
444 | } |
445 | lock.unlock(); |
446 | |
447 | const QString &file = it.key(); |
448 | const QByteArray &data = it.value(); |
449 | |
450 | if (data.isEmpty()) |
451 | continue; |
452 | |
453 | QUrl url; |
454 | url.setScheme(QLatin1String("qthelp" )); |
455 | url.setAuthority(authority: namespaceName); |
456 | url.setPath(path: QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file); |
457 | |
458 | if (url.hasFragment()) |
459 | url.setFragment(fragment: QString()); |
460 | |
461 | const QString &fullFileName = url.toString(); |
462 | if (!fullFileName.endsWith(s: QLatin1String(".html" )) |
463 | && !fullFileName.endsWith(s: QLatin1String(".htm" )) |
464 | && !fullFileName.endsWith(s: QLatin1String(".txt" ))) { |
465 | continue; |
466 | } |
467 | |
468 | QTextStream s(data); |
469 | auto encoding = QStringDecoder::encodingForHtml(data); |
470 | if (encoding) |
471 | s.setEncoding(*encoding); |
472 | |
473 | const QString &text = s.readAll(); |
474 | if (text.isEmpty()) |
475 | continue; |
476 | |
477 | QString title; |
478 | QString contents; |
479 | if (fullFileName.endsWith(s: QLatin1String(".txt" ))) { |
480 | title = fullFileName.mid(position: fullFileName.lastIndexOf(c: QLatin1Char('/')) + 1); |
481 | contents = text.toHtmlEscaped(); |
482 | } else { |
483 | QTextDocument doc; |
484 | doc.setHtml(text); |
485 | |
486 | title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped(); |
487 | contents = doc.toPlainText().toHtmlEscaped(); |
488 | } |
489 | |
490 | writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents); |
491 | } |
492 | } |
493 | writer.flush(); |
494 | const QString &path = engine.documentationFileName(namespaceName); |
495 | indexMap.insert(key: namespaceName, value: QFileInfo(path).lastModified()); |
496 | } |
497 | |
498 | writeIndexMap(engine: &engine, indexMap); |
499 | |
500 | writer.endTransaction(); |
501 | emit indexingFinished(); |
502 | } |
503 | |
504 | } // namespace std |
505 | } // namespace fulltextsearch |
506 | |
507 | QT_END_NAMESPACE |
508 | |