1/**
2 * parsetrigrams.cpp
3 *
4 * Parse a set of trigram files into a QMap, and serialize to stdout.
5 * Note: we allow this data to be read into QHash. We use QMap here
6 * to get deterministic output from run to run.
7 *
8 * SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net>
9 *
10 * SPDX-License-Identifier: LGPL-2.1-or-later
11 */
12
13#include <QDataStream>
14#include <QDir>
15#include <QFile>
16#include <QMap>
17#include <QRegularExpression>
18#include <QString>
19#include <QTextStream>
20
21int main(int argc, char **argv)
22{
23 if (argc < 2) {
24 return 1;
25 }
26
27 QFile sout;
28 sout.open(stdout, QIODevice::WriteOnly);
29 QDataStream out(&sout);
30
31 QString path = QLatin1String(argv[1]);
32 QDir td(path);
33
34 /*
35 * We use QMap (instead of QHash) here to get deterministic output
36 * from run to run.
37 */
38 QMap<QString, QMap<QString, int>> models;
39
40 const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)"));
41 const QStringList files = td.entryList(QDir::Files);
42 for (const QString &fname : files) {
43 QFile fin(td.filePath(fname));
44 fin.open(QFile::ReadOnly | QFile::Text);
45 QTextStream stream(&fin);
46
47 while (!stream.atEnd()) {
48 QString line = stream.readLine();
49 const QRegularExpressionMatch match = rx.match(line);
50 if (match.hasMatch()) {
51 models[fname][line.left(3)] = match.capturedView(1).toInt();
52 }
53 }
54 }
55
56 out << models;
57}
58

source code of sonnet/data/parsetrigrams.cpp