1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qhelpsearchindexwriter_default_p.h"
5#include "qhelp_global.h"
6#include "qhelpenginecore.h"
7#include "qhelpdbreader_p.h"
8
9#include <QtCore/QDataStream>
10#include <QtCore/QDateTime>
11#include <QtCore/QDir>
12#include <QtCore/QStringDecoder>
13#include <QtCore/QTextStream>
14#include <QtCore/QSet>
15#include <QtCore/QUrl>
16#include <QtCore/QVariant>
17#include <QtSql/QSqlDatabase>
18#include <QtSql/QSqlDriver>
19#include <QtSql/QSqlError>
20#include <QtSql/QSqlQuery>
21
22#include <QTextDocument>
23
24QT_BEGIN_NAMESPACE
25
26namespace fulltextsearch {
27namespace qt {
28
29const char FTS_DB_NAME[] = "fts";
30
31Writer::Writer(const QString &path)
32 : m_dbDir(path)
33{
34 clearLegacyIndex();
35 QDir().mkpath(dirPath: m_dbDir);
36 m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: QLatin1String("QHelpWriter"), pointer: this);
37 m_db = new QSqlDatabase();
38 *m_db = QSqlDatabase::addDatabase(type: QLatin1String("QSQLITE"), connectionName: m_uniqueId);
39 const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME);
40 m_db->setDatabaseName(dbPath);
41 if (!m_db->open()) {
42 const QString &error = QHelpSearchIndexWriter::tr(s: "Cannot open database \"%1\" using connection \"%2\": %3")
43 .arg(args: dbPath, args&: m_uniqueId, args: m_db->lastError().text());
44 qWarning(msg: "%s", qUtf8Printable(error));
45 delete m_db;
46 m_db = nullptr;
47 QSqlDatabase::removeDatabase(connectionName: m_uniqueId);
48 m_uniqueId = QString();
49 } else {
50 startTransaction();
51 }
52}
53
54bool Writer::tryInit(bool reindex)
55{
56 if (!m_db)
57 return true;
58
59 QSqlQuery query(*m_db);
60 // HACK: we try to perform any modifying command just to check if
61 // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver)
62 if (!query.exec(query: QLatin1String("CREATE TABLE foo ();"))) {
63 if (query.lastError().nativeErrorCode() == QLatin1String("5")) // db is locked
64 return false;
65 }
66 // HACK: clear what we have created
67 query.exec(query: QLatin1String("DROP TABLE foo;"));
68
69 init(reindex);
70 return true;
71}
72
73bool Writer::hasDB()
74{
75 if (!m_db)
76 return false;
77
78 QSqlQuery query(*m_db);
79
80 query.prepare(query: QLatin1String("SELECT id FROM info LIMIT 1"));
81 query.exec();
82
83 return query.next();
84}
85
86void Writer::clearLegacyIndex()
87{
88 // Clear old legacy clucene index.
89 // More important in case of Creator, since
90 // the index folder is common for all Creator versions
91 QDir dir(m_dbDir);
92 if (!dir.exists())
93 return;
94
95 const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden);
96 if (!list.contains(str: QLatin1String(FTS_DB_NAME))) {
97 for (const QString &item : list)
98 dir.remove(fileName: item);
99 }
100}
101
102void Writer::init(bool reindex)
103{
104 if (!m_db)
105 return;
106
107 QSqlQuery query(*m_db);
108
109 if (reindex && hasDB()) {
110 m_needOptimize = true;
111
112 query.exec(query: QLatin1String("DROP TABLE titles;"));
113 query.exec(query: QLatin1String("DROP TABLE contents;"));
114 query.exec(query: QLatin1String("DROP TABLE info;"));
115 }
116
117 query.exec(query: QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"));
118
119 query.exec(query: QLatin1String("CREATE VIRTUAL TABLE titles USING fts5("
120 "namespace UNINDEXED, attributes UNINDEXED, "
121 "url UNINDEXED, title, "
122 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
123 query.exec(query: QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN "
124 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
125 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
126 "END;"));
127 query.exec(query: QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN "
128 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
129 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
130 "END;"));
131 query.exec(query: QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN "
132 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
133 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
134 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
135 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
136 "END;"));
137
138 query.exec(query: QLatin1String("CREATE VIRTUAL TABLE contents USING fts5("
139 "namespace UNINDEXED, attributes UNINDEXED, "
140 "url UNINDEXED, title, data, "
141 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
142 query.exec(query: QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN "
143 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
144 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
145 "END;"));
146 query.exec(query: QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN "
147 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
148 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
149 "END;"));
150 query.exec(query: QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN "
151 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
152 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
153 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
154 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
155 "END;"));
156}
157
158Writer::~Writer()
159{
160 if (m_db) {
161 m_db->close();
162 delete m_db;
163 }
164
165 if (!m_uniqueId.isEmpty())
166 QSqlDatabase::removeDatabase(connectionName: m_uniqueId);
167}
168
169void Writer::flush()
170{
171 if (!m_db)
172 return;
173
174 QSqlQuery query(*m_db);
175
176 query.prepare(query: QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"));
177 query.addBindValue(val: m_namespaces);
178 query.addBindValue(val: m_attributes);
179 query.addBindValue(val: m_urls);
180 query.addBindValue(val: m_titles);
181 query.addBindValue(val: m_contents);
182 query.execBatch();
183
184 m_namespaces = QVariantList();
185 m_attributes = QVariantList();
186 m_urls = QVariantList();
187 m_titles = QVariantList();
188 m_contents = QVariantList();
189}
190
191void Writer::removeNamespace(const QString &namespaceName)
192{
193 if (!m_db)
194 return;
195
196 if (!hasNamespace(namespaceName))
197 return; // no data to delete
198
199 m_needOptimize = true;
200
201 QSqlQuery query(*m_db);
202
203 query.prepare(query: QLatin1String("DELETE FROM info WHERE namespace = ?"));
204 query.addBindValue(val: namespaceName);
205 query.exec();
206}
207
208bool Writer::hasNamespace(const QString &namespaceName)
209{
210 if (!m_db)
211 return false;
212
213 QSqlQuery query(*m_db);
214
215 query.prepare(query: QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1"));
216 query.addBindValue(val: namespaceName);
217 query.exec();
218
219 return query.next();
220}
221
222void Writer::insertDoc(const QString &namespaceName,
223 const QString &attributes,
224 const QString &url,
225 const QString &title,
226 const QString &contents)
227{
228 m_namespaces.append(t: namespaceName);
229 m_attributes.append(t: attributes);
230 m_urls.append(t: url);
231 m_titles.append(t: title);
232 m_contents.append(t: contents);
233}
234
235void Writer::startTransaction()
236{
237 if (!m_db)
238 return;
239
240 m_needOptimize = false;
241 if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions))
242 m_db->transaction();
243}
244
245void Writer::endTransaction()
246{
247 if (!m_db)
248 return;
249
250 QSqlQuery query(*m_db);
251
252 if (m_needOptimize) {
253 query.exec(query: QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')"));
254 query.exec(query: QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')"));
255 }
256
257 if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions))
258 m_db->commit();
259
260 if (m_needOptimize)
261 query.exec(query: QLatin1String("VACUUM"));
262}
263
264QHelpSearchIndexWriter::QHelpSearchIndexWriter()
265 : QThread()
266 , m_cancel(false)
267{
268}
269
270QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
271{
272 m_mutex.lock();
273 this->m_cancel = true;
274 m_mutex.unlock();
275
276 wait();
277}
278
279void QHelpSearchIndexWriter::cancelIndexing()
280{
281 QMutexLocker lock(&m_mutex);
282 m_cancel = true;
283}
284
285void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
286 const QString &indexFilesFolder,
287 bool reindex)
288{
289 wait();
290 QMutexLocker lock(&m_mutex);
291
292 m_cancel = false;
293 m_reindex = reindex;
294 m_collectionFile = collectionFile;
295 m_indexFilesFolder = indexFilesFolder;
296
297 lock.unlock();
298
299 start(QThread::LowestPriority);
300}
301
302static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces";
303
304static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine)
305{
306 QMap<QString, QDateTime> indexMap;
307 QDataStream dataStream(engine.customValue(
308 key: QLatin1String(IndexedNamespacesKey)).toByteArray());
309 dataStream >> indexMap;
310 return indexMap;
311}
312
313static bool writeIndexMap(QHelpEngineCore *engine,
314 const QMap<QString, QDateTime> &indexMap)
315{
316 QByteArray data;
317
318 QDataStream dataStream(&data, QIODevice::ReadWrite);
319 dataStream << indexMap;
320
321 return engine->setCustomValue(
322 key: QLatin1String(IndexedNamespacesKey), value: data);
323}
324
325static bool clearIndexMap(QHelpEngineCore *engine)
326{
327 return engine->removeCustomValue(key: QLatin1String(IndexedNamespacesKey));
328}
329
330void QHelpSearchIndexWriter::run()
331{
332 QMutexLocker lock(&m_mutex);
333
334 if (m_cancel)
335 return;
336
337 const bool reindex(m_reindex);
338 const QString collectionFile(m_collectionFile);
339 const QString indexPath(m_indexFilesFolder);
340
341 lock.unlock();
342
343 QHelpEngineCore engine(collectionFile, nullptr);
344 if (!engine.setupData())
345 return;
346
347 if (reindex)
348 clearIndexMap(engine: &engine);
349
350 emit indexingStarted();
351
352 Writer writer(indexPath);
353
354 while (!writer.tryInit(reindex))
355 sleep(1);
356
357 const QStringList &registeredDocs = engine.registeredDocumentations();
358 QMap<QString, QDateTime> indexMap = readIndexMap(engine);
359
360 if (!reindex) {
361 for (const QString &namespaceName : registeredDocs) {
362 if (indexMap.contains(key: namespaceName)) {
363 const QString path = engine.documentationFileName(namespaceName);
364 if (indexMap.value(key: namespaceName) < QFileInfo(path).lastModified()) {
365 // Remove some outdated indexed stuff
366 indexMap.remove(key: namespaceName);
367 writer.removeNamespace(namespaceName);
368 } else if (!writer.hasNamespace(namespaceName)) {
369 // No data in fts db for namespace.
370 // The namespace could have been removed from fts db
371 // or the whole fts db have been removed
372 // without removing it from indexMap.
373 indexMap.remove(key: namespaceName);
374 }
375 } else {
376 // Needed in case namespaceName was removed from indexMap
377 // without removing it from fts db.
378 // May happen when e.g. qch file was removed manually
379 // without removing fts db.
380 writer.removeNamespace(namespaceName);
381 }
382 // TODO: we may also detect if there are any other data
383 // and remove it
384 }
385 } else {
386 indexMap.clear();
387 }
388
389 for (const QString &namespaceName : indexMap.keys()) {
390 if (!registeredDocs.contains(str: namespaceName)) {
391 indexMap.remove(key: namespaceName);
392 writer.removeNamespace(namespaceName);
393 }
394 }
395
396 for (const QString &namespaceName : registeredDocs) {
397 lock.relock();
398 if (m_cancel) {
399 // store what we have done so far
400 writeIndexMap(engine: &engine, indexMap);
401 writer.endTransaction();
402 emit indexingFinished();
403 return;
404 }
405 lock.unlock();
406
407 // if indexed, continue
408 if (indexMap.contains(key: namespaceName))
409 continue;
410
411 const QString fileName = engine.documentationFileName(namespaceName);
412 QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName(
413 name: fileName, pointer: this), nullptr);
414 if (!reader.init())
415 continue;
416
417 const QString virtualFolder = reader.virtualFolder();
418
419 const QList<QStringList> &attributeSets =
420 engine.filterAttributeSets(namespaceName);
421
422 for (const QStringList &attributes : attributeSets) {
423 const QString &attributesString = attributes.join(sep: QLatin1Char('|'));
424
425 const QMultiMap<QString, QByteArray> htmlFiles =
426 reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("html"));
427 const QMultiMap<QString, QByteArray> htmFiles =
428 reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("htm"));
429 const QMultiMap<QString, QByteArray> txtFiles =
430 reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("txt"));
431
432 QMultiMap<QString, QByteArray> files = htmlFiles;
433 files.unite(other: htmFiles);
434 files.unite(other: txtFiles);
435
436 for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) {
437 lock.relock();
438 if (m_cancel) {
439 // store what we have done so far
440 writeIndexMap(engine: &engine, indexMap);
441 writer.endTransaction();
442 emit indexingFinished();
443 return;
444 }
445 lock.unlock();
446
447 const QString &file = it.key();
448 const QByteArray &data = it.value();
449
450 if (data.isEmpty())
451 continue;
452
453 QUrl url;
454 url.setScheme(QLatin1String("qthelp"));
455 url.setAuthority(authority: namespaceName);
456 url.setPath(path: QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file);
457
458 if (url.hasFragment())
459 url.setFragment(fragment: QString());
460
461 const QString &fullFileName = url.toString();
462 if (!fullFileName.endsWith(s: QLatin1String(".html"))
463 && !fullFileName.endsWith(s: QLatin1String(".htm"))
464 && !fullFileName.endsWith(s: QLatin1String(".txt"))) {
465 continue;
466 }
467
468 QTextStream s(data);
469 auto encoding = QStringDecoder::encodingForHtml(data);
470 if (encoding)
471 s.setEncoding(*encoding);
472
473 const QString &text = s.readAll();
474 if (text.isEmpty())
475 continue;
476
477 QString title;
478 QString contents;
479 if (fullFileName.endsWith(s: QLatin1String(".txt"))) {
480 title = fullFileName.mid(position: fullFileName.lastIndexOf(c: QLatin1Char('/')) + 1);
481 contents = text.toHtmlEscaped();
482 } else {
483 QTextDocument doc;
484 doc.setHtml(text);
485
486 title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped();
487 contents = doc.toPlainText().toHtmlEscaped();
488 }
489
490 writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents);
491 }
492 }
493 writer.flush();
494 const QString &path = engine.documentationFileName(namespaceName);
495 indexMap.insert(key: namespaceName, value: QFileInfo(path).lastModified());
496 }
497
498 writeIndexMap(engine: &engine, indexMap);
499
500 writer.endTransaction();
501 emit indexingFinished();
502}
503
504} // namespace std
505} // namespace fulltextsearch
506
507QT_END_NAMESPACE
508

source code of qttools/src/assistant/help/qhelpsearchindexwriter_default.cpp