1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the Qt Assistant of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qhelpsearchindexwriter_default_p.h"
41#include "qhelp_global.h"
42#include "qhelpenginecore.h"
43#include "qhelpdbreader_p.h"
44
45#include <QtCore/QDataStream>
46#include <QtCore/QDateTime>
47#include <QtCore/QDir>
48#include <QtCore/QTextCodec>
49#include <QtCore/QTextStream>
50#include <QtCore/QSet>
51#include <QtCore/QUrl>
52#include <QtCore/QVariant>
53#include <QtSql/QSqlDatabase>
54#include <QtSql/QSqlDriver>
55#include <QtSql/QSqlError>
56#include <QtSql/QSqlQuery>
57
58#include <QTextDocument>
59
60QT_BEGIN_NAMESPACE
61
62namespace fulltextsearch {
63namespace qt {
64
65const char FTS_DB_NAME[] = "fts";
66
67Writer::Writer(const QString &path)
68 : m_dbDir(path)
69{
70 clearLegacyIndex();
71 QDir().mkpath(dirPath: m_dbDir);
72 m_uniqueId = QHelpGlobal::uniquifyConnectionName(name: QLatin1String("QHelpWriter"), pointer: this);
73 m_db = new QSqlDatabase();
74 *m_db = QSqlDatabase::addDatabase(type: QLatin1String("QSQLITE"), connectionName: m_uniqueId);
75 const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME);
76 m_db->setDatabaseName(dbPath);
77 if (!m_db->open()) {
78 const QString &error = QHelpSearchIndexWriter::tr(s: "Cannot open database \"%1\" using connection \"%2\": %3")
79 .arg(args: dbPath, args&: m_uniqueId, args: m_db->lastError().text());
80 qWarning(msg: "%s", qUtf8Printable(error));
81 delete m_db;
82 m_db = nullptr;
83 QSqlDatabase::removeDatabase(connectionName: m_uniqueId);
84 m_uniqueId = QString();
85 } else {
86 startTransaction();
87 }
88}
89
90bool Writer::tryInit(bool reindex)
91{
92 if (!m_db)
93 return true;
94
95 QSqlQuery query(*m_db);
96 // HACK: we try to perform any modifying command just to check if
97 // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver)
98 if (!query.exec(query: QLatin1String("CREATE TABLE foo ();"))) {
99 if (query.lastError().nativeErrorCode() == QLatin1String("5")) // db is locked
100 return false;
101 }
102 // HACK: clear what we have created
103 query.exec(query: QLatin1String("DROP TABLE foo;"));
104
105 init(reindex);
106 return true;
107}
108
109bool Writer::hasDB()
110{
111 if (!m_db)
112 return false;
113
114 QSqlQuery query(*m_db);
115
116 query.prepare(query: QLatin1String("SELECT id FROM info LIMIT 1"));
117 query.exec();
118
119 return query.next();
120}
121
122void Writer::clearLegacyIndex()
123{
124 // Clear old legacy clucene index.
125 // More important in case of Creator, since
126 // the index folder is common for all Creator versions
127 QDir dir(m_dbDir);
128 if (!dir.exists())
129 return;
130
131 const QStringList &list = dir.entryList(filters: QDir::Files | QDir::Hidden);
132 if (!list.contains(str: QLatin1String(FTS_DB_NAME))) {
133 for (const QString &item : list)
134 dir.remove(fileName: item);
135 }
136}
137
138void Writer::init(bool reindex)
139{
140 if (!m_db)
141 return;
142
143 QSqlQuery query(*m_db);
144
145 if (reindex && hasDB()) {
146 m_needOptimize = true;
147
148 query.exec(query: QLatin1String("DROP TABLE titles;"));
149 query.exec(query: QLatin1String("DROP TABLE contents;"));
150 query.exec(query: QLatin1String("DROP TABLE info;"));
151 }
152
153 query.exec(query: QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"));
154
155 query.exec(query: QLatin1String("CREATE VIRTUAL TABLE titles USING fts5("
156 "namespace UNINDEXED, attributes UNINDEXED, "
157 "url UNINDEXED, title, "
158 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
159 query.exec(query: QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN "
160 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
161 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
162 "END;"));
163 query.exec(query: QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN "
164 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
165 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
166 "END;"));
167 query.exec(query: QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN "
168 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
169 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
170 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
171 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
172 "END;"));
173
174 query.exec(query: QLatin1String("CREATE VIRTUAL TABLE contents USING fts5("
175 "namespace UNINDEXED, attributes UNINDEXED, "
176 "url UNINDEXED, title, data, "
177 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
178 query.exec(query: QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN "
179 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
180 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
181 "END;"));
182 query.exec(query: QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN "
183 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
184 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
185 "END;"));
186 query.exec(query: QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN "
187 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
188 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
189 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
190 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
191 "END;"));
192}
193
194Writer::~Writer()
195{
196 if (m_db) {
197 m_db->close();
198 delete m_db;
199 }
200
201 if (!m_uniqueId.isEmpty())
202 QSqlDatabase::removeDatabase(connectionName: m_uniqueId);
203}
204
205void Writer::flush()
206{
207 if (!m_db)
208 return;
209
210 QSqlQuery query(*m_db);
211
212 query.prepare(query: QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"));
213 query.addBindValue(val: m_namespaces);
214 query.addBindValue(val: m_attributes);
215 query.addBindValue(val: m_urls);
216 query.addBindValue(val: m_titles);
217 query.addBindValue(val: m_contents);
218 query.execBatch();
219
220 m_namespaces = QVariantList();
221 m_attributes = QVariantList();
222 m_urls = QVariantList();
223 m_titles = QVariantList();
224 m_contents = QVariantList();
225}
226
227void Writer::removeNamespace(const QString &namespaceName)
228{
229 if (!m_db)
230 return;
231
232 if (!hasNamespace(namespaceName))
233 return; // no data to delete
234
235 m_needOptimize = true;
236
237 QSqlQuery query(*m_db);
238
239 query.prepare(query: QLatin1String("DELETE FROM info WHERE namespace = ?"));
240 query.addBindValue(val: namespaceName);
241 query.exec();
242}
243
244bool Writer::hasNamespace(const QString &namespaceName)
245{
246 if (!m_db)
247 return false;
248
249 QSqlQuery query(*m_db);
250
251 query.prepare(query: QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1"));
252 query.addBindValue(val: namespaceName);
253 query.exec();
254
255 return query.next();
256}
257
258void Writer::insertDoc(const QString &namespaceName,
259 const QString &attributes,
260 const QString &url,
261 const QString &title,
262 const QString &contents)
263{
264 m_namespaces.append(t: namespaceName);
265 m_attributes.append(t: attributes);
266 m_urls.append(t: url);
267 m_titles.append(t: title);
268 m_contents.append(t: contents);
269}
270
271void Writer::startTransaction()
272{
273 if (!m_db)
274 return;
275
276 m_needOptimize = false;
277 if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions))
278 m_db->transaction();
279}
280
281void Writer::endTransaction()
282{
283 if (!m_db)
284 return;
285
286 QSqlQuery query(*m_db);
287
288 if (m_needOptimize) {
289 query.exec(query: QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')"));
290 query.exec(query: QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')"));
291 }
292
293 if (m_db && m_db->driver()->hasFeature(f: QSqlDriver::Transactions))
294 m_db->commit();
295
296 if (m_needOptimize)
297 query.exec(query: QLatin1String("VACUUM"));
298}
299
300QHelpSearchIndexWriter::QHelpSearchIndexWriter()
301 : QThread()
302 , m_cancel(false)
303{
304}
305
306QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
307{
308 m_mutex.lock();
309 this->m_cancel = true;
310 m_mutex.unlock();
311
312 wait();
313}
314
315void QHelpSearchIndexWriter::cancelIndexing()
316{
317 QMutexLocker lock(&m_mutex);
318 m_cancel = true;
319}
320
321void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
322 const QString &indexFilesFolder,
323 bool reindex)
324{
325 wait();
326 QMutexLocker lock(&m_mutex);
327
328 m_cancel = false;
329 m_reindex = reindex;
330 m_collectionFile = collectionFile;
331 m_indexFilesFolder = indexFilesFolder;
332
333 lock.unlock();
334
335 start(QThread::LowestPriority);
336}
337
338static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces";
339
340static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine)
341{
342 QMap<QString, QDateTime> indexMap;
343 QDataStream dataStream(engine.customValue(
344 key: QLatin1String(IndexedNamespacesKey)).toByteArray());
345 dataStream >> indexMap;
346 return indexMap;
347}
348
349static bool writeIndexMap(QHelpEngineCore *engine,
350 const QMap<QString, QDateTime> &indexMap)
351{
352 QByteArray data;
353
354 QDataStream dataStream(&data, QIODevice::ReadWrite);
355 dataStream << indexMap;
356
357 return engine->setCustomValue(
358 key: QLatin1String(IndexedNamespacesKey), value: data);
359}
360
361static bool clearIndexMap(QHelpEngineCore *engine)
362{
363 return engine->removeCustomValue(key: QLatin1String(IndexedNamespacesKey));
364}
365
366void QHelpSearchIndexWriter::run()
367{
368 QMutexLocker lock(&m_mutex);
369
370 if (m_cancel)
371 return;
372
373 const bool reindex(m_reindex);
374 const QString collectionFile(m_collectionFile);
375 const QString indexPath(m_indexFilesFolder);
376
377 lock.unlock();
378
379 QHelpEngineCore engine(collectionFile, nullptr);
380 if (!engine.setupData())
381 return;
382
383 if (reindex)
384 clearIndexMap(engine: &engine);
385
386 emit indexingStarted();
387
388 Writer writer(indexPath);
389
390 while (!writer.tryInit(reindex))
391 sleep(1);
392
393 const QStringList &registeredDocs = engine.registeredDocumentations();
394 QMap<QString, QDateTime> indexMap = readIndexMap(engine);
395
396 if (!reindex) {
397 for (const QString &namespaceName : registeredDocs) {
398 if (indexMap.contains(akey: namespaceName)) {
399 const QString path = engine.documentationFileName(namespaceName);
400 if (indexMap.value(akey: namespaceName) < QFileInfo(path).lastModified()) {
401 // Remove some outdated indexed stuff
402 indexMap.remove(akey: namespaceName);
403 writer.removeNamespace(namespaceName);
404 } else if (!writer.hasNamespace(namespaceName)) {
405 // No data in fts db for namespace.
406 // The namespace could have been removed from fts db
407 // or the whole fts db have been removed
408 // without removing it from indexMap.
409 indexMap.remove(akey: namespaceName);
410 }
411 } else {
412 // Needed in case namespaceName was removed from indexMap
413 // without removing it from fts db.
414 // May happen when e.g. qch file was removed manually
415 // without removing fts db.
416 writer.removeNamespace(namespaceName);
417 }
418 // TODO: we may also detect if there are any other data
419 // and remove it
420 }
421 } else {
422 indexMap.clear();
423 }
424
425 for (const QString &namespaceName : indexMap.keys()) {
426 if (!registeredDocs.contains(str: namespaceName)) {
427 indexMap.remove(akey: namespaceName);
428 writer.removeNamespace(namespaceName);
429 }
430 }
431
432 for (const QString &namespaceName : registeredDocs) {
433 lock.relock();
434 if (m_cancel) {
435 // store what we have done so far
436 writeIndexMap(engine: &engine, indexMap);
437 writer.endTransaction();
438 emit indexingFinished();
439 return;
440 }
441 lock.unlock();
442
443 // if indexed, continue
444 if (indexMap.contains(akey: namespaceName))
445 continue;
446
447 const QString fileName = engine.documentationFileName(namespaceName);
448 QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName(
449 name: fileName, pointer: this), nullptr);
450 if (!reader.init())
451 continue;
452
453 const QString virtualFolder = reader.virtualFolder();
454
455 const QList<QStringList> &attributeSets =
456 engine.filterAttributeSets(namespaceName);
457
458 for (const QStringList &attributes : attributeSets) {
459 const QString &attributesString = attributes.join(sep: QLatin1Char('|'));
460
461 const QMap<QString, QByteArray> htmlFiles
462 = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("html"));
463 const QMap<QString, QByteArray> htmFiles
464 = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("htm"));
465 const QMap<QString, QByteArray> txtFiles
466 = reader.filesData(filterAttributes: attributes, extensionFilter: QLatin1String("txt"));
467
468 QMultiMap<QString, QByteArray> files = htmlFiles;
469 files.unite(other: htmFiles);
470 files.unite(other: txtFiles);
471
472 for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) {
473 lock.relock();
474 if (m_cancel) {
475 // store what we have done so far
476 writeIndexMap(engine: &engine, indexMap);
477 writer.endTransaction();
478 emit indexingFinished();
479 return;
480 }
481 lock.unlock();
482
483 const QString &file = it.key();
484 const QByteArray &data = it.value();
485
486 if (data.isEmpty())
487 continue;
488
489 QUrl url;
490 url.setScheme(QLatin1String("qthelp"));
491 url.setAuthority(authority: namespaceName);
492 url.setPath(path: QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file);
493
494 if (url.hasFragment())
495 url.setFragment(fragment: QString());
496
497 const QString &fullFileName = url.toString();
498 if (!fullFileName.endsWith(s: QLatin1String(".html"))
499 && !fullFileName.endsWith(s: QLatin1String(".htm"))
500 && !fullFileName.endsWith(s: QLatin1String(".txt"))) {
501 continue;
502 }
503
504 QTextStream s(data);
505 const QString &en = QHelpGlobal::codecFromData(data);
506 s.setCodec(QTextCodec::codecForName(name: en.toLatin1().constData()));
507
508 const QString &text = s.readAll();
509 if (text.isEmpty())
510 continue;
511
512 QString title;
513 QString contents;
514 if (fullFileName.endsWith(s: QLatin1String(".txt"))) {
515 title = fullFileName.mid(position: fullFileName.lastIndexOf(c: QLatin1Char('/')) + 1);
516 contents = text.toHtmlEscaped();
517 } else {
518 QTextDocument doc;
519 doc.setHtml(text);
520
521 title = doc.metaInformation(info: QTextDocument::DocumentTitle).toHtmlEscaped();
522 contents = doc.toPlainText().toHtmlEscaped();
523 }
524
525 writer.insertDoc(namespaceName, attributes: attributesString, url: fullFileName, title, contents);
526 }
527 }
528 writer.flush();
529 const QString &path = engine.documentationFileName(namespaceName);
530 indexMap.insert(akey: namespaceName, avalue: QFileInfo(path).lastModified());
531 }
532
533 writeIndexMap(engine: &engine, indexMap);
534
535 writer.endTransaction();
536 emit indexingFinished();
537}
538
539} // namespace std
540} // namespace fulltextsearch
541
542QT_END_NAMESPACE
543

source code of qttools/src/assistant/help/qhelpsearchindexwriter_default.cpp