| 1 | /** |
| 2 | * parsetrigrams.cpp |
| 3 | * |
| 4 | * Parse a set of trigram files into a QMap, and serialize to stdout. |
| 5 | * Note: we allow this data to be read into QHash. We use QMap here |
| 6 | * to get deterministic output from run to run. |
| 7 | * |
| 8 | * SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net> |
| 9 | * |
| 10 | * SPDX-License-Identifier: LGPL-2.1-or-later |
| 11 | */ |
| 12 | |
| 13 | #include <QDataStream> |
| 14 | #include <QDir> |
| 15 | #include <QFile> |
| 16 | #include <QMap> |
| 17 | #include <QRegularExpression> |
| 18 | #include <QString> |
| 19 | #include <QTextStream> |
| 20 | |
| 21 | int main(int argc, char **argv) |
| 22 | { |
| 23 | if (argc < 2) { |
| 24 | return 1; |
| 25 | } |
| 26 | |
| 27 | QFile sout; |
| 28 | sout.open(stdout, ioFlags: QIODevice::WriteOnly); |
| 29 | QDataStream out(&sout); |
| 30 | |
| 31 | QString path = QLatin1String(argv[1]); |
| 32 | QDir td(path); |
| 33 | |
| 34 | /* |
| 35 | * We use QMap (instead of QHash) here to get deterministic output |
| 36 | * from run to run. |
| 37 | */ |
| 38 | QMap<QString, QMap<QString, int>> models; |
| 39 | |
| 40 | const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)" )); |
| 41 | const QStringList files = td.entryList(filters: QDir::Files); |
| 42 | for (const QString &fname : files) { |
| 43 | QFile fin(td.filePath(fileName: fname)); |
| 44 | fin.open(flags: QFile::ReadOnly | QFile::Text); |
| 45 | QTextStream stream(&fin); |
| 46 | |
| 47 | while (!stream.atEnd()) { |
| 48 | QString line = stream.readLine(); |
| 49 | const QRegularExpressionMatch match = rx.match(subject: line); |
| 50 | if (match.hasMatch()) { |
| 51 | models[fname][line.left(n: 3)] = match.capturedView(nth: 1).toInt(); |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | out << models; |
| 57 | } |
| 58 | |