1 | /* |
2 | SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de> |
3 | SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | // BEGIN includes |
9 | #include "kateregexpsearch.h" |
10 | |
11 | #include <ktexteditor/document.h> |
12 | // END includes |
13 | |
14 | // Turn debug messages on/off here |
15 | // #define FAST_DEBUG_ENABLE |
16 | |
17 | #ifdef FAST_DEBUG_ENABLE |
18 | #define FAST_DEBUG(x) qCDebug(LOG_KTE) << x |
19 | #else |
20 | #define FAST_DEBUG(x) |
21 | #endif |
22 | |
23 | class KateRegExpSearch::ReplacementStream |
24 | { |
25 | public: |
26 | struct counter { |
27 | counter(int value, int minWidth) |
28 | : value(value) |
29 | , minWidth(minWidth) |
30 | { |
31 | } |
32 | |
33 | const int value; |
34 | const int minWidth; |
35 | }; |
36 | |
37 | struct cap { |
38 | cap(int n) |
39 | : n(n) |
40 | { |
41 | } |
42 | |
43 | const int n; |
44 | }; |
45 | |
46 | enum CaseConversion { |
47 | upperCase, ///< \U ... uppercase from now on |
48 | upperCaseFirst, ///< \u ... uppercase the first letter |
49 | lowerCase, ///< \L ... lowercase from now on |
50 | lowerCaseFirst, ///< \l ... lowercase the first letter |
51 | keepCase ///< \E ... back to original case |
52 | }; |
53 | |
54 | public: |
55 | ReplacementStream(const QStringList &capturedTexts); |
56 | |
57 | QString str() const |
58 | { |
59 | return m_str; |
60 | } |
61 | |
62 | ReplacementStream &operator<<(const QString &); |
63 | ReplacementStream &operator<<(const counter &); |
64 | ReplacementStream &operator<<(const cap &); |
65 | ReplacementStream &operator<<(CaseConversion); |
66 | |
67 | private: |
68 | const QStringList m_capturedTexts; |
69 | CaseConversion m_caseConversion; |
70 | QString m_str; |
71 | }; |
72 | |
73 | KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts) |
74 | : m_capturedTexts(capturedTexts) |
75 | , m_caseConversion(keepCase) |
76 | { |
77 | } |
78 | |
79 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str) |
80 | { |
81 | switch (m_caseConversion) { |
82 | case upperCase: |
83 | // Copy as uppercase |
84 | m_str.append(s: str.toUpper()); |
85 | break; |
86 | |
87 | case upperCaseFirst: |
88 | if (str.length() > 0) { |
89 | m_str.append(c: str.at(i: 0).toUpper()); |
90 | m_str.append(v: QStringView(str).mid(pos: 1)); |
91 | m_caseConversion = keepCase; |
92 | } |
93 | break; |
94 | |
95 | case lowerCase: |
96 | // Copy as lowercase |
97 | m_str.append(s: str.toLower()); |
98 | break; |
99 | |
100 | case lowerCaseFirst: |
101 | if (str.length() > 0) { |
102 | m_str.append(c: str.at(i: 0).toLower()); |
103 | m_str.append(v: QStringView(str).mid(pos: 1)); |
104 | m_caseConversion = keepCase; |
105 | } |
106 | break; |
107 | |
108 | case keepCase: // FALLTHROUGH |
109 | default: |
110 | // Copy unmodified |
111 | m_str.append(s: str); |
112 | break; |
113 | } |
114 | |
115 | return *this; |
116 | } |
117 | |
118 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c) |
119 | { |
120 | // Zero padded counter value |
121 | m_str.append(QStringLiteral("%1" ).arg(a: c.value, fieldWidth: c.minWidth, base: 10, fillChar: QLatin1Char('0'))); |
122 | |
123 | return *this; |
124 | } |
125 | |
126 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap) |
127 | { |
128 | if (0 <= cap.n && cap.n < m_capturedTexts.size()) { |
129 | (*this) << m_capturedTexts[cap.n]; |
130 | } else { |
131 | // Insert just the number to be consistent with QRegExp ("\c" becomes "c") |
132 | m_str.append(s: QString::number(cap.n)); |
133 | } |
134 | |
135 | return *this; |
136 | } |
137 | |
138 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion) |
139 | { |
140 | m_caseConversion = caseConversion; |
141 | |
142 | return *this; |
143 | } |
144 | |
145 | // BEGIN d'tor, c'tor |
146 | // |
147 | // KateSearch Constructor |
148 | // |
149 | KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document) |
150 | : m_document(document) |
151 | { |
152 | } |
153 | |
154 | // helper structs for captures re-construction |
155 | struct TwoViewCursor { |
156 | int index; |
157 | int line; |
158 | int col; |
159 | }; |
160 | |
161 | struct IndexPair { |
162 | int openIndex; |
163 | int closeIndex; |
164 | }; |
165 | |
166 | QList<KTextEditor::Range> |
167 | KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options) |
168 | { |
169 | // Save regexes to avoid reconstructing regexes all the time |
170 | static QRegularExpression preRegex; |
171 | static QRegularExpression repairedRegex; |
172 | |
173 | // Returned if no matches are found |
174 | QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid()); |
175 | |
176 | // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the |
177 | // this method returning here if 'pattern' is empty. |
178 | if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) { |
179 | return noResult; |
180 | } |
181 | |
182 | // Always enable Unicode support |
183 | options |= QRegularExpression::UseUnicodePropertiesOption; |
184 | |
185 | if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) { |
186 | preRegex = QRegularExpression(pattern, options); |
187 | } |
188 | |
189 | // If repairPattern() is called on an invalid regex pattern it may cause asserts |
190 | // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern |
191 | // expects at least one character after a '\') |
192 | if (!preRegex.isValid()) { |
193 | return noResult; |
194 | } |
195 | |
196 | // detect pattern type (single- or mutli-line) |
197 | bool stillMultiLine; |
198 | const QString repairedPattern = repairPattern(pattern, stillMultiLine); |
199 | |
200 | // Enable multiline mode, so that the ^ and $ metacharacters in the pattern |
201 | // are allowed to match, respectively, immediately after and immediately |
202 | // before any newline in the subject string, as well as at the very beginning |
203 | // and at the very end of the subject string (see QRegularExpression docs). |
204 | // |
205 | // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange |
206 | // ends in the middle of a line, then a '$' won't match at that position. And |
207 | // matches that are out of the inputRange are rejected. |
208 | if (stillMultiLine) { |
209 | options |= QRegularExpression::MultilineOption; |
210 | } |
211 | |
212 | // check if anything changed at all |
213 | if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) { |
214 | repairedRegex.setPattern(repairedPattern); |
215 | repairedRegex.setPatternOptions(options); |
216 | } |
217 | if (!repairedRegex.isValid()) { |
218 | return noResult; |
219 | } |
220 | |
221 | const int rangeStartLine = inputRange.start().line(); |
222 | const int rangeStartCol = inputRange.start().column(); |
223 | |
224 | const int rangeEndLine = inputRange.end().line(); |
225 | const int rangeEndCol = inputRange.end().column(); |
226 | |
227 | if (stillMultiLine) { |
228 | const int rangeLineCount = rangeEndLine - rangeStartLine + 1; |
229 | FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")" ); |
230 | |
231 | const int docLineCount = m_document->lines(); |
232 | // nothing to do... |
233 | if (rangeStartLine >= docLineCount) { |
234 | return noResult; |
235 | } |
236 | |
237 | QList<int> lineLens(rangeLineCount); |
238 | int maxMatchOffset = 0; |
239 | |
240 | // all lines in the input range |
241 | QString wholeRange; |
242 | for (int i = 0; i < rangeLineCount; ++i) { |
243 | const int docLineIndex = rangeStartLine + i; |
244 | if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index |
245 | return noResult; |
246 | } |
247 | |
248 | const QString textLine = m_document->line(line: docLineIndex); |
249 | lineLens[i] = textLine.length(); |
250 | wholeRange.append(s: textLine); |
251 | |
252 | // This check is needed as some parts in vimode rely on this behaviour. |
253 | // We add an '\n' as a delimiter between lines in the range; but never after the |
254 | // last line as that would add an '\n' that isn't there in the original text, |
255 | // and can skew search results or hit an assert when accessing lineLens later |
256 | // in the code. |
257 | if (i != (rangeLineCount - 1)) { |
258 | wholeRange.append(c: QLatin1Char('\n')); |
259 | } |
260 | |
261 | // lineLens.at(i) + 1, because '\n' was added |
262 | maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1; |
263 | |
264 | FAST_DEBUG(" line" << i << "has length" << lineLens.at(i)); |
265 | } |
266 | |
267 | FAST_DEBUG("Max. match offset" << maxMatchOffset); |
268 | |
269 | QRegularExpressionMatch match; |
270 | bool found = false; |
271 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: wholeRange, offset: rangeStartCol); |
272 | |
273 | if (backwards) { |
274 | while (iter.hasNext()) { |
275 | QRegularExpressionMatch curMatch = iter.next(); |
276 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
277 | match.swap(other&: curMatch); |
278 | found = true; |
279 | } |
280 | } |
281 | } else { /* forwards */ |
282 | QRegularExpressionMatch curMatch; |
283 | if (iter.hasNext()) { |
284 | curMatch = iter.next(); |
285 | } |
286 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
287 | match.swap(other&: curMatch); |
288 | found = true; |
289 | } |
290 | } |
291 | |
292 | if (!found) { |
293 | // no match |
294 | FAST_DEBUG("not found" ); |
295 | return noResult; |
296 | } |
297 | |
298 | // Capture groups: save opening and closing indices and build a map, |
299 | // the correct values will be written into it later |
300 | QMap<int, TwoViewCursor *> indicesToCursors; |
301 | const int numCaptures = repairedRegex.captureCount(); |
302 | QList<IndexPair> indexPairs(numCaptures + 1); |
303 | for (int c = 0; c <= numCaptures; ++c) { |
304 | const int openIndex = match.capturedStart(nth: c); |
305 | IndexPair &pair = indexPairs[c]; |
306 | if (openIndex == -1) { |
307 | // An invalid index indicates an empty capture group |
308 | pair.openIndex = -1; |
309 | pair.closeIndex = -1; |
310 | FAST_DEBUG("capture []" ); |
311 | } else { |
312 | const int closeIndex = match.capturedEnd(nth: c); |
313 | pair.openIndex = openIndex; |
314 | pair.closeIndex = closeIndex; |
315 | FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]" ); |
316 | |
317 | // each key no more than once |
318 | if (!indicesToCursors.contains(key: openIndex)) { |
319 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
320 | twoViewCursor->index = openIndex; |
321 | indicesToCursors.insert(key: openIndex, value: twoViewCursor); |
322 | FAST_DEBUG(" capture group start index added: " << openIndex); |
323 | } |
324 | if (!indicesToCursors.contains(key: closeIndex)) { |
325 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
326 | twoViewCursor->index = closeIndex; |
327 | indicesToCursors.insert(key: closeIndex, value: twoViewCursor); |
328 | FAST_DEBUG(" capture group end index added: " << closeIndex); |
329 | } |
330 | } |
331 | } |
332 | |
333 | // find out where they belong |
334 | int curRelLine = 0; |
335 | int curRelCol = 0; |
336 | int curRelIndex = 0; |
337 | |
338 | for (TwoViewCursor *twoViewCursor : std::as_const(t&: indicesToCursors)) { |
339 | // forward to index, save line/col |
340 | const int index = twoViewCursor->index; |
341 | FAST_DEBUG("resolving position" << index); |
342 | |
343 | while (curRelIndex <= index) { |
344 | FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); |
345 | |
346 | const int curRelLineLen = lineLens.at(i: curRelLine); |
347 | const int curLineRemainder = curRelLineLen - curRelCol; |
348 | const int lineFeedIndex = curRelIndex + curLineRemainder; |
349 | if (index <= lineFeedIndex) { |
350 | if (index == lineFeedIndex) { |
351 | // on this line _at_ line feed |
352 | FAST_DEBUG(" on line feed" ); |
353 | const int absLine = curRelLine + rangeStartLine; |
354 | twoViewCursor->line = absLine; |
355 | twoViewCursor->col = curRelLineLen; |
356 | |
357 | // advance to next line |
358 | const int advance = (index - curRelIndex) + 1; |
359 | ++curRelLine; |
360 | curRelCol = 0; |
361 | curRelIndex += advance; |
362 | } else { // index < lineFeedIndex |
363 | // on this line _before_ line feed |
364 | FAST_DEBUG(" before line feed" ); |
365 | const int diff = (index - curRelIndex); |
366 | const int absLine = curRelLine + rangeStartLine; |
367 | const int absCol = curRelCol + diff; |
368 | twoViewCursor->line = absLine; |
369 | twoViewCursor->col = absCol; |
370 | |
371 | // advance on same line |
372 | const int advance = diff + 1; |
373 | curRelCol += advance; |
374 | curRelIndex += advance; |
375 | } |
376 | FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")" ); |
377 | } else { // if (index > lineFeedIndex) |
378 | // not on this line |
379 | // advance to next line |
380 | FAST_DEBUG(" not on this line" ); |
381 | ++curRelLine; |
382 | curRelCol = 0; |
383 | const int advance = curLineRemainder + 1; |
384 | curRelIndex += advance; |
385 | } |
386 | } |
387 | } |
388 | |
389 | // build result array |
390 | QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid()); |
391 | for (int y = 0; y <= numCaptures; y++) { |
392 | IndexPair &pair = indexPairs[y]; |
393 | if (!(pair.openIndex == -1 || pair.closeIndex == -1)) { |
394 | const TwoViewCursor *const openCursors = indicesToCursors.value(key: pair.openIndex); |
395 | const TwoViewCursor *const closeCursors = indicesToCursors.value(key: pair.closeIndex); |
396 | const int startLine = openCursors->line; |
397 | const int startCol = openCursors->col; |
398 | const int endLine = closeCursors->line; |
399 | const int endCol = closeCursors->col; |
400 | FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")" ); |
401 | result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); |
402 | } |
403 | } |
404 | |
405 | // free structs allocated for indicesToCursors |
406 | qDeleteAll(c: indicesToCursors); |
407 | |
408 | return result; |
409 | } else { |
410 | // single-line regex search (forwards and backwards) |
411 | const int rangeStartCol = inputRange.start().column(); |
412 | const uint rangeEndCol = inputRange.end().column(); |
413 | |
414 | const int rangeStartLine = inputRange.start().line(); |
415 | const int rangeEndLine = inputRange.end().line(); |
416 | |
417 | const int forInit = backwards ? rangeEndLine : rangeStartLine; |
418 | |
419 | const int forInc = backwards ? -1 : +1; |
420 | |
421 | FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine)); |
422 | |
423 | for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) { |
424 | if (j < 0 || m_document->lines() <= j) { |
425 | FAST_DEBUG("searchText | line " << j << ": no" ); |
426 | return noResult; |
427 | } |
428 | |
429 | const QString textLine = m_document->line(line: j); |
430 | |
431 | const int offset = (j == rangeStartLine) ? rangeStartCol : 0; |
432 | const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length(); |
433 | |
434 | bool found = false; |
435 | |
436 | QRegularExpressionMatch match; |
437 | |
438 | if (backwards) { |
439 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: textLine, offset); |
440 | while (iter.hasNext()) { |
441 | QRegularExpressionMatch curMatch = iter.next(); |
442 | if (curMatch.capturedEnd() <= endLineMaxOffset) { |
443 | match.swap(other&: curMatch); |
444 | found = true; |
445 | } |
446 | } |
447 | } else { |
448 | match = repairedRegex.match(subject: textLine, offset); |
449 | if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) { |
450 | found = true; |
451 | } |
452 | } |
453 | |
454 | if (found) { |
455 | FAST_DEBUG("line " << j << ": yes" ); |
456 | |
457 | // build result array |
458 | const int numCaptures = repairedRegex.captureCount(); |
459 | QList<KTextEditor::Range> result(numCaptures + 1); |
460 | result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd()); |
461 | |
462 | FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart << ")..(" << j << ", " << match.capturedEnd() << ")" ); |
463 | |
464 | for (int y = 1; y <= numCaptures; ++y) { |
465 | const int openIndex = match.capturedStart(nth: y); |
466 | |
467 | if (openIndex == -1) { |
468 | result[y] = KTextEditor::Range::invalid(); |
469 | |
470 | FAST_DEBUG("capture []" ); |
471 | } else { |
472 | const int closeIndex = match.capturedEnd(nth: y); |
473 | |
474 | FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")" ); |
475 | |
476 | result[y] = KTextEditor::Range(j, openIndex, j, closeIndex); |
477 | } |
478 | } |
479 | return result; |
480 | } else { |
481 | FAST_DEBUG("searchText | line " << j << ": no" ); |
482 | } |
483 | } |
484 | } |
485 | return noResult; |
486 | } |
487 | |
488 | /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text) |
489 | { |
490 | return buildReplacement(text, capturedTexts: QStringList(), replacementCounter: 0, replacementGoodies: false); |
491 | } |
492 | |
493 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter) |
494 | { |
495 | return buildReplacement(text, capturedTexts, replacementCounter, replacementGoodies: true); |
496 | } |
497 | |
498 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies) |
499 | { |
500 | // get input |
501 | const int inputLen = text.length(); |
502 | int input = 0; // walker index |
503 | |
504 | // prepare output |
505 | ReplacementStream out(capturedTexts); |
506 | |
507 | while (input < inputLen) { |
508 | switch (text[input].unicode()) { |
509 | case L'\n': |
510 | out << text[input]; |
511 | input++; |
512 | break; |
513 | |
514 | case L'\\': |
515 | if (input + 1 >= inputLen) { |
516 | // copy backslash |
517 | out << text[input]; |
518 | input++; |
519 | break; |
520 | } |
521 | |
522 | switch (text[input + 1].unicode()) { |
523 | case L'0': // "\0000".."\0377" |
524 | if (input + 4 >= inputLen) { |
525 | out << ReplacementStream::cap(0); |
526 | input += 2; |
527 | } else { |
528 | bool stripAndSkip = false; |
529 | const ushort text_2 = text[input + 2].unicode(); |
530 | if ((text_2 >= L'0') && (text_2 <= L'3')) { |
531 | const ushort text_3 = text[input + 3].unicode(); |
532 | if ((text_3 >= L'0') && (text_3 <= L'7')) { |
533 | const ushort text_4 = text[input + 4].unicode(); |
534 | if ((text_4 >= L'0') && (text_4 <= L'7')) { |
535 | int digits[3]; |
536 | for (int i = 0; i < 3; i++) { |
537 | digits[i] = 7 - (L'7' - text[input + 2 + i].unicode()); |
538 | } |
539 | const int ch = 64 * digits[0] + 8 * digits[1] + digits[2]; |
540 | out << QChar(ch); |
541 | input += 5; |
542 | } else { |
543 | stripAndSkip = true; |
544 | } |
545 | } else { |
546 | stripAndSkip = true; |
547 | } |
548 | } else { |
549 | stripAndSkip = true; |
550 | } |
551 | |
552 | if (stripAndSkip) { |
553 | out << ReplacementStream::cap(0); |
554 | input += 2; |
555 | } |
556 | } |
557 | break; |
558 | |
559 | // single letter captures \x |
560 | case L'1': |
561 | case L'2': |
562 | case L'3': |
563 | case L'4': |
564 | case L'5': |
565 | case L'6': |
566 | case L'7': |
567 | case L'8': |
568 | case L'9': |
569 | out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode())); |
570 | input += 2; |
571 | break; |
572 | |
573 | // multi letter captures \{xxxx} |
574 | case L'{': { |
575 | // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124 |
576 | int capture = 0; |
577 | int captureSize = 2; |
578 | while ((input + captureSize) < inputLen) { |
579 | const ushort nextDigit = text[input + captureSize].unicode(); |
580 | if ((nextDigit >= L'0') && (nextDigit <= L'9')) { |
581 | capture = (10 * capture) + (9 - (L'9' - nextDigit)); |
582 | ++captureSize; |
583 | continue; |
584 | } |
585 | if (nextDigit == L'}') { |
586 | ++captureSize; |
587 | break; |
588 | } |
589 | break; |
590 | } |
591 | out << ReplacementStream::cap(capture); |
592 | input += captureSize; |
593 | break; |
594 | } |
595 | |
596 | case L'E': // FALLTHROUGH |
597 | case L'L': // FALLTHROUGH |
598 | case L'l': // FALLTHROUGH |
599 | case L'U': // FALLTHROUGH |
600 | case L'u': |
601 | if (!replacementGoodies) { |
602 | // strip backslash ("\?" -> "?") |
603 | out << text[input + 1]; |
604 | } else { |
605 | // handle case switcher |
606 | switch (text[input + 1].unicode()) { |
607 | case L'L': |
608 | out << ReplacementStream::lowerCase; |
609 | break; |
610 | |
611 | case L'l': |
612 | out << ReplacementStream::lowerCaseFirst; |
613 | break; |
614 | |
615 | case L'U': |
616 | out << ReplacementStream::upperCase; |
617 | break; |
618 | |
619 | case L'u': |
620 | out << ReplacementStream::upperCaseFirst; |
621 | break; |
622 | |
623 | case L'E': // FALLTHROUGH |
624 | default: |
625 | out << ReplacementStream::keepCase; |
626 | } |
627 | } |
628 | input += 2; |
629 | break; |
630 | |
631 | case L'#': |
632 | if (!replacementGoodies) { |
633 | // strip backslash ("\?" -> "?") |
634 | out << text[input + 1]; |
635 | input += 2; |
636 | } else { |
637 | // handle replacement counter |
638 | // eat and count all following hash marks |
639 | // each hash stands for a leading zero: \### will produces 001, 002, ... |
640 | int minWidth = 1; |
641 | while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) { |
642 | minWidth++; |
643 | } |
644 | out << ReplacementStream::counter(replacementCounter, minWidth); |
645 | input += 1 + minWidth; |
646 | } |
647 | break; |
648 | |
649 | case L'a': |
650 | out << QChar(0x07); |
651 | input += 2; |
652 | break; |
653 | |
654 | case L'f': |
655 | out << QChar(0x0c); |
656 | input += 2; |
657 | break; |
658 | |
659 | case L'n': |
660 | out << QChar(0x0a); |
661 | input += 2; |
662 | break; |
663 | |
664 | case L'r': |
665 | out << QChar(0x0d); |
666 | input += 2; |
667 | break; |
668 | |
669 | case L't': |
670 | out << QChar(0x09); |
671 | input += 2; |
672 | break; |
673 | |
674 | case L'v': |
675 | out << QChar(0x0b); |
676 | input += 2; |
677 | break; |
678 | |
679 | case L'x': // "\x0000".."\xffff" |
680 | if (input + 5 >= inputLen) { |
681 | // strip backslash ("\x" -> "x") |
682 | out << text[input + 1]; |
683 | input += 2; |
684 | } else { |
685 | bool stripAndSkip = false; |
686 | const ushort text_2 = text[input + 2].unicode(); |
687 | if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) { |
688 | const ushort text_3 = text[input + 3].unicode(); |
689 | if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) { |
690 | const ushort text_4 = text[input + 4].unicode(); |
691 | if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) { |
692 | const ushort text_5 = text[input + 5].unicode(); |
693 | if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f')) |
694 | || ((text_5 >= L'A') && (text_5 <= L'F'))) { |
695 | int digits[4]; |
696 | for (int i = 0; i < 4; i++) { |
697 | const ushort cur = text[input + 2 + i].unicode(); |
698 | if ((cur >= L'0') && (cur <= L'9')) { |
699 | digits[i] = 9 - (L'9' - cur); |
700 | } else if ((cur >= L'a') && (cur <= L'f')) { |
701 | digits[i] = 15 - (L'f' - cur); |
702 | } else { // if ((cur >= L'A') && (cur <= L'F'))) |
703 | digits[i] = 15 - (L'F' - cur); |
704 | } |
705 | } |
706 | |
707 | const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3]; |
708 | out << QChar(ch); |
709 | input += 6; |
710 | } else { |
711 | stripAndSkip = true; |
712 | } |
713 | } else { |
714 | stripAndSkip = true; |
715 | } |
716 | } else { |
717 | stripAndSkip = true; |
718 | } |
719 | } |
720 | |
721 | if (stripAndSkip) { |
722 | // strip backslash ("\x" -> "x") |
723 | out << text[input + 1]; |
724 | input += 2; |
725 | } |
726 | } |
727 | break; |
728 | |
729 | default: |
730 | // strip backslash ("\?" -> "?") |
731 | out << text[input + 1]; |
732 | input += 2; |
733 | } |
734 | break; |
735 | |
736 | default: |
737 | out << text[input]; |
738 | input++; |
739 | } |
740 | } |
741 | |
742 | return out.str(); |
743 | } |
744 | |
745 | QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine) |
746 | { |
747 | // '\s' can make a pattern multi-line, it's replaced here with '[ \t]'; |
748 | // besides \s, the following characters can make a pattern multi-line: |
749 | // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0??? |
750 | // a multi-line pattern must not pass as single-line, the other |
751 | // way around will just result in slower searches and is therefore |
752 | // not as critical |
753 | |
754 | const int inputLen = pattern.length(); |
755 | const QStringView patternView{pattern}; |
756 | |
757 | // prepare output |
758 | QString output; |
759 | output.reserve(asize: 2 * inputLen + 1); // twice should be enough for the average case |
760 | |
761 | // parser state |
762 | bool insideClass = false; |
763 | |
764 | stillMultiLine = false; |
765 | int input = 0; |
766 | while (input < inputLen) { |
767 | if (insideClass) { |
768 | // wait for closing, unescaped ']' |
769 | switch (pattern[input].unicode()) { |
770 | case L'\\': |
771 | switch (pattern[input + 1].unicode()) { |
772 | case L'x': |
773 | if (input + 5 < inputLen) { |
774 | // copy "\x????" unmodified |
775 | output.append(v: patternView.mid(pos: input, n: 6)); |
776 | input += 6; |
777 | } else { |
778 | // copy "\x" unmodified |
779 | output.append(v: patternView.mid(pos: input, n: 2)); |
780 | input += 2; |
781 | } |
782 | stillMultiLine = true; |
783 | break; |
784 | |
785 | case L'0': |
786 | if (input + 4 < inputLen) { |
787 | // copy "\0???" unmodified |
788 | output.append(v: patternView.mid(pos: input, n: 5)); |
789 | input += 5; |
790 | } else { |
791 | // copy "\0" unmodified |
792 | output.append(v: patternView.mid(pos: input, n: 2)); |
793 | input += 2; |
794 | } |
795 | stillMultiLine = true; |
796 | break; |
797 | |
798 | case L's': |
799 | // replace "\s" with "[ \t]" |
800 | output.append(s: QLatin1String(" \\t" )); |
801 | input += 2; |
802 | break; |
803 | |
804 | case L'n': |
805 | stillMultiLine = true; |
806 | // FALLTROUGH |
807 | Q_FALLTHROUGH(); |
808 | |
809 | default: |
810 | // copy "\?" unmodified |
811 | output.append(v: patternView.mid(pos: input, n: 2)); |
812 | input += 2; |
813 | } |
814 | break; |
815 | |
816 | case L']': |
817 | // copy "]" unmodified |
818 | insideClass = false; |
819 | output.append(c: pattern[input]); |
820 | ++input; |
821 | break; |
822 | |
823 | default: |
824 | // copy "?" unmodified |
825 | output.append(c: pattern[input]); |
826 | ++input; |
827 | } |
828 | } else { |
829 | switch (pattern[input].unicode()) { |
830 | case L'\\': |
831 | switch (pattern[input + 1].unicode()) { |
832 | case L'x': |
833 | if (input + 5 < inputLen) { |
834 | // copy "\x????" unmodified |
835 | output.append(v: patternView.mid(pos: input, n: 6)); |
836 | input += 6; |
837 | } else { |
838 | // copy "\x" unmodified |
839 | output.append(v: patternView.mid(pos: input, n: 2)); |
840 | input += 2; |
841 | } |
842 | stillMultiLine = true; |
843 | break; |
844 | |
845 | case L'0': |
846 | if (input + 4 < inputLen) { |
847 | // copy "\0???" unmodified |
848 | output.append(v: patternView.mid(pos: input, n: 5)); |
849 | input += 5; |
850 | } else { |
851 | // copy "\0" unmodified |
852 | output.append(v: patternView.mid(pos: input, n: 2)); |
853 | input += 2; |
854 | } |
855 | stillMultiLine = true; |
856 | break; |
857 | |
858 | case L's': |
859 | // replace "\s" with "[ \t]" |
860 | output.append(s: QLatin1String("[ \\t]" )); |
861 | input += 2; |
862 | break; |
863 | |
864 | case L'n': |
865 | stillMultiLine = true; |
866 | // FALLTROUGH |
867 | Q_FALLTHROUGH(); |
868 | default: |
869 | // copy "\?" unmodified |
870 | output.append(v: patternView.mid(pos: input, n: 2)); |
871 | input += 2; |
872 | } |
873 | break; |
874 | |
875 | case L'[': |
876 | // copy "[" unmodified |
877 | insideClass = true; |
878 | output.append(c: pattern[input]); |
879 | ++input; |
880 | break; |
881 | |
882 | default: |
883 | // copy "?" unmodified |
884 | output.append(c: pattern[input]); |
885 | ++input; |
886 | } |
887 | } |
888 | } |
889 | return output; |
890 | } |
891 | |
892 | // Kill our helpers again |
893 | #ifdef FAST_DEBUG_ENABLE |
894 | #undef FAST_DEBUG_ENABLE |
895 | #endif |
896 | #undef FAST_DEBUG |
897 | |