Warning: That file was not part of the compilation database. It may have many parsing errors.
1 | /**************************************************************************** |
---|---|
2 | ** Copyright (C) 2017 Ford Motor Company. |
3 | ** All rights reserved. |
4 | ** |
5 | ** Copyright (C) 2017 The Qt Company Ltd. |
6 | ** Contact: https://www.qt.io/licensing/ |
7 | ** |
8 | ** This file is part of the QtRemoteObjects module of the Qt Toolkit. |
9 | ** |
10 | ** $QT_BEGIN_LICENSE:LGPL$ |
11 | ** Commercial License Usage |
12 | ** Licensees holding valid commercial Qt licenses may use this file in |
13 | ** accordance with the commercial license agreement provided with the |
14 | ** Software or, alternatively, in accordance with the terms contained in |
15 | ** a written agreement between you and The Qt Company. For licensing terms |
16 | ** and conditions see https://www.qt.io/terms-conditions. For further |
17 | ** information use the contact form at https://www.qt.io/contact-us. |
18 | ** |
19 | ** GNU Lesser General Public License Usage |
20 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
21 | ** General Public License version 3 as published by the Free Software |
22 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
23 | ** packaging of this file. Please review the following information to |
24 | ** ensure the GNU Lesser General Public License version 3 requirements |
25 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
26 | ** |
27 | ** GNU General Public License Usage |
28 | ** Alternatively, this file may be used under the terms of the GNU |
29 | ** General Public License version 2.0 or (at your option) the GNU General |
30 | ** Public license version 3 or any later version approved by the KDE Free |
31 | ** Qt Foundation. The licenses are as published by the Free Software |
32 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
33 | ** included in the packaging of this file. Please review the following |
34 | ** information to ensure the GNU General Public License requirements will |
35 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
36 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
37 | ** |
38 | ** $QT_END_LICENSE$ |
39 | ** |
40 | ****************************************************************************/ |
41 | |
42 | #ifndef QREGEXPARSER_H |
43 | #define QREGEXPARSER_H |
44 | |
45 | #include <QtCore/qshareddata.h> |
46 | #include <QtCore/qvarlengtharray.h> |
47 | #include <QtCore/qvariant.h> |
48 | #ifdef QT_BOOTSTRAPPED |
49 | # include <QtCore/qregexp.h> |
50 | # define REGEX QRegExp |
51 | #else |
52 | # include <QtCore/qregularexpression.h> |
53 | # define REGEX QRegularExpression |
54 | #endif |
55 | #include <QtCore/qmap.h> |
56 | #include <QtCore/qfile.h> |
57 | #include <QtCore/qtextstream.h> |
58 | #include <QtCore/qdebug.h> |
59 | |
60 | struct MatchCandidate { |
61 | MatchCandidate(const QString &n, const QString &t, int i) : name(n), matchText(t), index(i) {} |
62 | QString name; |
63 | QString matchText; |
64 | int index; |
65 | }; |
66 | |
67 | QT_BEGIN_NAMESPACE |
68 | |
69 | template <typename _Parser, typename _Table> |
70 | class QRegexParser: protected _Table |
71 | { |
72 | public: |
73 | QRegexParser(int maxMatchLen=4096); |
74 | virtual ~QRegexParser(); |
75 | |
76 | virtual bool parse(); |
77 | |
78 | virtual void reset() {} |
79 | |
80 | inline QVariant &sym(int index); |
81 | |
82 | void setBuffer(const QString &buffer); |
83 | |
84 | void setBufferFromDevice(QIODevice *device); |
85 | |
86 | void setDebug(); |
87 | |
88 | QString errorString() const |
89 | { |
90 | return m_errorString; |
91 | } |
92 | |
93 | void setErrorString(const QString &error) |
94 | { |
95 | m_errorString = error; |
96 | qWarning() << m_errorString; |
97 | } |
98 | |
99 | inline const QMap<QString, QString>& captured() const |
100 | { |
101 | return m_captured; |
102 | } |
103 | |
104 | inline bool isDebug() const |
105 | { |
106 | return m_debug; |
107 | } |
108 | |
109 | inline int lineNumber() const |
110 | { |
111 | return m_lineno; |
112 | } |
113 | |
114 | private: |
115 | int nextToken(); |
116 | |
117 | inline bool consumeRule(int rule) |
118 | { |
119 | return static_cast<_Parser*> (this)->consumeRule(rule); |
120 | } |
121 | |
122 | enum { DefaultStackSize = 128 }; |
123 | |
124 | struct Data: public QSharedData |
125 | { |
126 | Data(): stackSize (DefaultStackSize), tos (0) {} |
127 | |
128 | QVarLengthArray<int, DefaultStackSize> stateStack; |
129 | QVarLengthArray<QVariant, DefaultStackSize> parseStack; |
130 | int stackSize; |
131 | int tos; |
132 | |
133 | void reallocateStack() { |
134 | stackSize <<= 1; |
135 | stateStack.resize(stackSize); |
136 | parseStack.resize(stackSize); |
137 | } |
138 | }; |
139 | |
140 | inline QString escapeString(QString s) |
141 | { |
142 | return s.replace(c: QLatin1Char('\n'), after: QLatin1String("\\n")).replace(c: QLatin1Char('\t'), after: QLatin1String( "\\t")); |
143 | } |
144 | |
145 | QSharedDataPointer<Data> d; |
146 | |
147 | QList<REGEX> m_regexes; |
148 | #ifndef QT_BOOTSTRAPPED |
149 | QMap<QChar, QList<int> > regexCandidates; |
150 | #endif |
151 | QList<int> m_tokens; |
152 | QString m_buffer, m_lastMatchText; |
153 | int m_loc, m_lastNewlinePosition; |
154 | int m_lineno; |
155 | int m_debug; |
156 | QStringList m_tokenNames; |
157 | QMap<QString, QString> m_captured; |
158 | int m_maxMatchLen; |
159 | QString m_errorString; |
160 | QVector<QMap<int, QString> > m_names; //storage for match names |
161 | }; |
162 | |
163 | template <typename _Parser, typename _Table> |
164 | inline QVariant &QRegexParser<_Parser, _Table>::sym(int n) |
165 | { |
166 | return d->parseStack [d->tos + n - 1]; |
167 | } |
168 | |
169 | template <typename _Parser, typename _Table> |
170 | QRegexParser<_Parser, _Table>::~QRegexParser() |
171 | { |
172 | } |
173 | |
174 | template <typename _Parser, typename _Table> |
175 | bool QRegexParser<_Parser, _Table>::parse() |
176 | { |
177 | m_errorString.clear(); |
178 | reset(); |
179 | const int INITIAL_STATE = 0; |
180 | |
181 | d->tos = 0; |
182 | d->reallocateStack(); |
183 | |
184 | int act = d->stateStack[++d->tos] = INITIAL_STATE; |
185 | int token = -1; |
186 | |
187 | Q_FOREVER { |
188 | if (token == -1 && - _Table::TERMINAL_COUNT != _Table::action_index[act]) |
189 | token = nextToken(); |
190 | |
191 | act = _Table::t_action(act, token); |
192 | |
193 | if (d->stateStack[d->tos] == _Table::ACCEPT_STATE) |
194 | return true; |
195 | |
196 | else if (act > 0) { |
197 | if (++d->tos == d->stackSize) |
198 | d->reallocateStack(); |
199 | |
200 | d->parseStack[d->tos] = d->parseStack[d->tos - 1]; |
201 | d->stateStack[d->tos] = act; |
202 | token = -1; |
203 | } |
204 | |
205 | else if (act < 0) { |
206 | int r = - act - 1; |
207 | d->tos -= _Table::rhs[r]; |
208 | act = d->stateStack[d->tos++]; |
209 | if (!consumeRule(rule: r)) |
210 | return false; |
211 | act = d->stateStack[d->tos] = _Table::nt_action(act, _Table::lhs[r] - _Table::TERMINAL_COUNT); |
212 | } |
213 | |
214 | else break; |
215 | } |
216 | |
217 | setErrorString(QStringLiteral("Unknown token encountered")); |
218 | return false; |
219 | } |
220 | |
221 | template <typename _Parser, typename _Table> |
222 | QRegexParser<_Parser, _Table>::QRegexParser(int maxMatchLen) : d(new Data()), m_loc(0), m_lastNewlinePosition(0), m_lineno(1), m_debug(0), m_maxMatchLen(maxMatchLen) |
223 | { |
224 | REGEX re(QStringLiteral("\\[([_a-zA-Z][_0-9a-zA-Z]*)(,\\s*M)?\\](.+)$")); |
225 | #ifdef QT_BOOTSTRAPPED |
226 | REGEX nameMatch(QStringLiteral("\\((\\?<(.*)>).+\\)")); |
227 | nameMatch.setMinimal(true); |
228 | #else |
229 | re.optimize(); |
230 | #endif |
231 | QMap<QString, int> token_lookup; |
232 | QMap<int, QString> names; |
233 | for (int i = 1; i < _Table::lhs[0]; i++) { |
234 | const QString text = QLatin1String(_Table::spell[i]); |
235 | names.clear(); |
236 | #ifdef QT_BOOTSTRAPPED |
237 | if (re.indexIn(text) == 0) { |
238 | const QString token = re.cap(1); |
239 | const bool multiline = re.cap(2).length() > 0; |
240 | QString pattern = re.cap(3); |
241 | //We need to identify/remove any match names in the pattern, since |
242 | //QRegExp doesn't support that feature |
243 | int pos = 0, counter = 1, loc = nameMatch.indexIn(pattern, pos); |
244 | while (loc >= 0) { |
245 | const QString res = nameMatch.cap(2); |
246 | if (!res.isEmpty()) { |
247 | names.insert(counter, res); |
248 | pattern.remove(nameMatch.cap(1)); |
249 | } |
250 | pos += loc + nameMatch.matchedLength() - nameMatch.cap(1).length(); |
251 | loc = nameMatch.indexIn(pattern, pos); |
252 | ++counter; |
253 | } |
254 | //We need to use indexIn, but that will search past the location we |
255 | //pass in. So prepend '^' and use QRegExp::CaretAtOffset. |
256 | if (pattern.at(0) != QChar(QLatin1Char('^'))) |
257 | pattern.prepend(QChar(QLatin1Char('^'))); |
258 | #else |
259 | QRegularExpressionMatch match = re.match(subject: text, offset: 0, matchType: QRegularExpression::NormalMatch, matchOptions: QRegularExpression::DontCheckSubjectStringMatchOption); |
260 | if (match.hasMatch()) { |
261 | const QString token = match.captured(nth: 1); |
262 | const bool multiline = match.captured(nth: 2).length() > 0; |
263 | const QString pattern = match.captured(nth: 3); |
264 | #endif |
265 | m_tokenNames.append(t: token); |
266 | int index = i; |
267 | if (token_lookup.contains(key: token)) |
268 | index = token_lookup[token]; |
269 | else |
270 | token_lookup[token] = i; |
271 | #ifdef QT_BOOTSTRAPPED |
272 | if (multiline) |
273 | qWarning() << "The multiline grammar option is ignore in force_bootstrap mode."; |
274 | #endif |
275 | REGEX pat(pattern); |
276 | #ifndef QT_BOOTSTRAPPED |
277 | if (multiline) |
278 | pat.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); |
279 | #endif |
280 | if (!pat.isValid()) |
281 | qCritical() << "Pattern error for token #"<< i << "for"<< text << "pattern ="<< pat << ":"<< pat.errorString(); |
282 | else { |
283 | #ifndef QT_BOOTSTRAPPED |
284 | pat.optimize(); |
285 | int counter = 0; |
286 | const auto namedCaptureGroups = pat.namedCaptureGroups(); |
287 | for (const QString &name : namedCaptureGroups) { |
288 | if (!name.isEmpty()) |
289 | names.insert(key: counter, value: name); |
290 | ++counter; |
291 | } |
292 | #endif |
293 | m_names.append(t: names); |
294 | m_regexes.append(t: pat); |
295 | if (token.startsWith(s: QLatin1String("ignore"))) |
296 | m_tokens.append(t: -1); |
297 | else |
298 | m_tokens.append(t: index); |
299 | } |
300 | } else { |
301 | qCritical() << "Error parsing regex at token #"<< i << "for"<< text << "Invalid syntax"; |
302 | } |
303 | } |
304 | } |
305 | |
306 | template <typename _Parser, typename _Table> |
307 | void QRegexParser<_Parser, _Table>::setBuffer(const QString &buffer) |
308 | { |
309 | m_buffer = buffer; |
310 | } |
311 | |
312 | template <typename _Parser, typename _Table> |
313 | void QRegexParser<_Parser, _Table>::setBufferFromDevice(QIODevice *device) |
314 | { |
315 | QTextStream in(device); |
316 | m_buffer = in.readAll(); |
317 | } |
318 | |
319 | template <typename _Parser, typename _Table> |
320 | void QRegexParser<_Parser, _Table>::setDebug() |
321 | { |
322 | m_debug = true; |
323 | for (int r = 0; r < _Table::RULE_COUNT; ++r) |
324 | { |
325 | int ridx = _Table::rule_index[r]; |
326 | int _rhs = _Table::rhs[r]; |
327 | qDebug("%3d) %s ::=", r + 1, _Table::spell[_Table::rule_info[ridx]]); |
328 | ++ridx; |
329 | for (int i = ridx; i < ridx + _rhs; ++i) |
330 | { |
331 | int symbol = _Table::rule_info[i]; |
332 | if (symbol > 0 && symbol < _Table::lhs[0]) |
333 | qDebug(msg: " token_%s (pattern = %s)",qPrintable(m_tokenNames[symbol-1]),qPrintable(m_regexes[symbol-1].pattern())); |
334 | else if (const char *name = _Table::spell[symbol]) |
335 | qDebug(msg: " %s", name); |
336 | else |
337 | qDebug(msg: " #%d", symbol); |
338 | } |
339 | qDebug(); |
340 | } |
341 | } |
342 | |
343 | template <typename _Parser, typename _Table> |
344 | int QRegexParser<_Parser, _Table>::nextToken() |
345 | { |
346 | static const REGEX newline(QLatin1String("(\\n)")); |
347 | int token = -1; |
348 | while (token < 0) |
349 | { |
350 | if (m_loc == m_buffer.size()) |
351 | return _Table::EOF_SYMBOL; |
352 | |
353 | //Check m_lastMatchText for newlines and update m_lineno |
354 | //This isn't necessary, but being able to provide the line # and character # |
355 | //where the match is failing sure makes building/debugging grammars easier. |
356 | #ifdef QT_BOOTSTRAPPED |
357 | int loc = 0, pos = newline.indexIn(m_lastMatchText, loc); |
358 | while (pos >= 0) { |
359 | m_lineno++; |
360 | loc += pos + 1; |
361 | m_lastNewlinePosition += pos + 1; |
362 | pos = newline.indexIn(m_lastMatchText, loc); |
363 | } |
364 | #else //QT_BOOTSTRAPPED |
365 | QRegularExpressionMatchIterator matches = newline.globalMatch(subject: m_lastMatchText); |
366 | while (matches.hasNext()) { |
367 | m_lineno++; |
368 | QRegularExpressionMatch match = matches.next(); |
369 | if (!matches.hasNext()) |
370 | m_lastNewlinePosition += match.capturedEnd(); |
371 | } |
372 | #endif //!QT_BOOTSTRAPPED |
373 | if (m_debug) { |
374 | qDebug(); |
375 | qDebug() << "nextToken loop, line ="<< m_lineno |
376 | << "line position ="<< m_loc - m_lastNewlinePosition |
377 | << "next 5 characters ="<< escapeString(s: m_buffer.mid(position: m_loc, n: 5)); |
378 | } |
379 | int best = -1, maxLen = -1; |
380 | #ifndef QT_BOOTSTRAPPED |
381 | QRegularExpressionMatch bestRegex; |
382 | #endif |
383 | |
384 | //Find the longest match. |
385 | //If more than one are the same (longest) length, return the first one in |
386 | //the order defined. |
387 | QList<MatchCandidate> candidates; |
388 | #ifndef QT_BOOTSTRAPPED |
389 | { |
390 | //We used PCRE's PartialMatch to eliminate most of the regexes by the first |
391 | //character, so we keep a regexCandidates map with the list of possible regexes |
392 | //based on initial characters found so far. |
393 | const QChar nextChar = m_buffer.at(i: m_loc); |
394 | //Populate the list if we haven't seeen this character before |
395 | if (!regexCandidates.contains(key: nextChar)) { |
396 | # if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0)) |
397 | const QStringRef tmp = m_buffer.midRef(position: m_loc,n: 1); |
398 | # else |
399 | const QString tmp = m_buffer.mid(m_loc,1); |
400 | # endif |
401 | int i = 0; |
402 | regexCandidates[nextChar] = QList<int>(); |
403 | for (const QRegularExpression &re : qAsConst(t&: m_regexes)) |
404 | { |
405 | QRegularExpressionMatch match = re.match(subjectRef: tmp, offset: 0, matchType: QRegularExpression::PartialPreferFirstMatch, matchOptions: QRegularExpression::DontCheckSubjectStringMatchOption); |
406 | //qDebug() << nextChar << tmp << match.hasMatch() << match.hasPartialMatch() << re.pattern(); |
407 | if (match.hasMatch() || match.hasPartialMatch()) |
408 | regexCandidates[nextChar] << i; |
409 | i++; |
410 | } |
411 | } |
412 | const auto indices = regexCandidates.value(akey: nextChar); |
413 | for (int i : indices) |
414 | { |
415 | //Seems like I should be able to run the regex on the entire string, but performance is horrible |
416 | //unless I use a substring. |
417 | //QRegularExpressionMatch match = m_regexes[i].match(m_buffer, m_loc, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption); |
418 | # if (QT_VERSION >= QT_VERSION_CHECK(5, 5, 0)) |
419 | QRegularExpressionMatch match = m_regexes.at(i).match(subjectRef: m_buffer.midRef(position: m_loc, n: m_maxMatchLen), offset: 0, matchType: QRegularExpression::NormalMatch, matchOptions: QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption); |
420 | # else |
421 | QRegularExpressionMatch match = m_regexes.at(i).match(m_buffer.mid(m_loc, m_maxMatchLen), 0, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption | QRegularExpression::DontCheckSubjectStringMatchOption); |
422 | # endif |
423 | if (match.hasMatch()) { |
424 | if (m_debug) |
425 | candidates << MatchCandidate(m_tokenNames[i], match.captured(), i); |
426 | if (match.capturedLength() > maxLen) { |
427 | best = i; |
428 | maxLen = match.capturedLength(); |
429 | bestRegex = match; |
430 | } |
431 | } |
432 | } |
433 | } |
434 | #else |
435 | { |
436 | int i = 0; |
437 | for (const QRegExp &r : qAsConst(m_regexes)) |
438 | { |
439 | if (r.indexIn(m_buffer, m_loc, QRegExp::CaretAtOffset) == m_loc) { |
440 | if (m_debug) |
441 | candidates << MatchCandidate(m_tokenNames[i], r.cap(0), i); |
442 | if (r.matchedLength() > maxLen) { |
443 | best = i; |
444 | maxLen = r.matchedLength(); |
445 | } |
446 | } |
447 | ++i; |
448 | } |
449 | } |
450 | #endif |
451 | if (best < 0) { |
452 | setErrorString(QLatin1String("Error generating tokens from file, next characters >%1<").arg(args: m_buffer.midRef(position: m_loc, n: 15))); |
453 | return -1; |
454 | } else { |
455 | const QMap<int, QString> &map = m_names.at(i: best); |
456 | if (!map.isEmpty()) |
457 | m_captured.clear(); |
458 | for (auto iter = map.cbegin(), end = map.cend(); iter != end; ++iter) { |
459 | #ifdef QT_BOOTSTRAPPED |
460 | m_captured.insert(iter.value(), m_regexes.at(best).cap(iter.key())); |
461 | #else |
462 | m_captured.insert(key: iter.value(), value: bestRegex.captured(nth: iter.key())); |
463 | #endif |
464 | } |
465 | if (m_debug) { |
466 | qDebug() << "Match candidates:"; |
467 | for (const MatchCandidate &m : qAsConst(t&: candidates)) { |
468 | QLatin1String result = m.index == best ? QLatin1String(" * ") : QLatin1String( " "); |
469 | qDebug() << qPrintable(result) << qPrintable(m.name) << qPrintable(escapeString(m.matchText)); |
470 | } |
471 | } |
472 | m_loc += maxLen; |
473 | if (m_tokens.at(i: best) >= 0) |
474 | token = m_tokens.at(i: best); |
475 | #ifdef QT_BOOTSTRAPPED |
476 | m_lastMatchText = m_regexes.at(best).cap(0); |
477 | #else |
478 | m_lastMatchText = bestRegex.captured(nth: 0); |
479 | #endif |
480 | } |
481 | } |
482 | return token; |
483 | } |
484 | |
485 | QT_END_NAMESPACE |
486 | |
487 | #endif // QREGEXPARSER_H |
488 |
Warning: That file was not part of the compilation database. It may have many parsing errors.