| 1 | // Copyright (C) 2016 The Qt Company Ltd. | 
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only | 
| 3 |  | 
| 4 | #include "qregexp.h" | 
| 5 |  | 
| 6 | #include "qalgorithms.h" | 
| 7 | #include "qbitarray.h" | 
| 8 | #include "qcache.h" | 
| 9 | #include "qdatastream.h" | 
| 10 | #include "qdebug.h" | 
| 11 | #include "qhashfunctions.h" | 
| 12 | #include "qlist.h" | 
| 13 | #include "qmap.h" | 
| 14 | #include "qmutex.h" | 
| 15 | #include "qstring.h" | 
| 16 | #include "qstringlist.h" | 
| 17 | #include "qstringmatcher.h" | 
| 18 | #include "private/qlocking_p.h" | 
| 19 | #include "qvarlengtharray.h" | 
| 20 |  | 
| 21 | #include <limits.h> | 
| 22 | #include <algorithm> | 
| 23 | #include <optional> | 
| 24 |  | 
| 25 | QT_BEGIN_NAMESPACE | 
| 26 |  | 
| 27 | // error strings for the regexp parser | 
| 28 | #define RXERR_OK         QT_TRANSLATE_NOOP("QRegExp", "no error occurred") | 
| 29 | #define RXERR_DISABLED   QT_TRANSLATE_NOOP("QRegExp", "disabled feature used") | 
| 30 | #define RXERR_CHARCLASS  QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax") | 
| 31 | #define RXERR_LOOKAHEAD  QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax") | 
| 32 | #define RXERR_LOOKBEHIND QT_TRANSLATE_NOOP("QRegExp", "lookbehinds not supported, see QTBUG-2371") | 
| 33 | #define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax") | 
| 34 | #define RXERR_OCTAL      QT_TRANSLATE_NOOP("QRegExp", "invalid octal value") | 
| 35 | #define RXERR_LEFTDELIM  QT_TRANSLATE_NOOP("QRegExp", "missing left delim") | 
| 36 | #define RXERR_END        QT_TRANSLATE_NOOP("QRegExp", "unexpected end") | 
| 37 | #define RXERR_LIMIT      QT_TRANSLATE_NOOP("QRegExp", "met internal limit") | 
| 38 | #define RXERR_INTERVAL   QT_TRANSLATE_NOOP("QRegExp", "invalid interval") | 
| 39 | #define RXERR_CATEGORY   QT_TRANSLATE_NOOP("QRegExp", "invalid category") | 
| 40 |  | 
| 41 | /*! | 
| 42 |     \class QRegExp | 
| 43 |     \inmodule QtCore5Compat | 
| 44 |     \reentrant | 
| 45 |     \brief The QRegExp class provides pattern matching using regular expressions. | 
| 46 |  | 
| 47 |     \ingroup tools | 
| 48 |     \ingroup shared | 
| 49 |  | 
| 50 |     \keyword regular expression | 
| 51 |  | 
| 52 |     This class is deprecated in Qt 6. Please use QRegularExpression instead | 
| 53 |     for all new code. For guidelines on porting old code from QRegExp to | 
| 54 |     QRegularExpression, see {Porting to QRegularExpression} | 
| 55 |  | 
| 56 |     A regular expression, or "regexp", is a pattern for matching | 
| 57 |     substrings in a text. This is useful in many contexts, e.g., | 
| 58 |  | 
| 59 |     \table | 
| 60 |     \row \li Validation | 
| 61 |          \li A regexp can test whether a substring meets some criteria, | 
| 62 |          e.g. is an integer or contains no whitespace. | 
| 63 |     \row \li Searching | 
| 64 |          \li A regexp provides more powerful pattern matching than | 
| 65 |          simple substring matching, e.g., match one of the words | 
| 66 |          \e{mail}, \e{letter} or \e{correspondence}, but none of the | 
| 67 |          words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc. | 
| 68 |      \row \li Search and Replace | 
| 69 |          \li A regexp can replace all occurrences of a substring with a | 
| 70 |          different substring, e.g., replace all occurrences of \e{&} | 
| 71 |          with \e{\&} except where the \e{&} is already followed by | 
| 72 |          an \e{amp;}. | 
| 73 |     \row \li String Splitting | 
| 74 |          \li A regexp can be used to identify where a string should be | 
| 75 |          split apart, e.g. splitting tab-delimited strings. | 
| 76 |     \endtable | 
| 77 |  | 
| 78 |     A brief introduction to regexps is presented, a description of | 
| 79 |     Qt's regexp language, some examples, and the function | 
| 80 |     documentation itself. QRegExp is modeled on Perl's regexp | 
| 81 |     language. It fully supports Unicode. QRegExp can also be used in a | 
| 82 |     simpler, \e{wildcard mode} that is similar to the functionality | 
| 83 |     found in command shells. The syntax rules used by QRegExp can be | 
| 84 |     changed with setPatternSyntax(). In particular, the pattern syntax | 
| 85 |     can be set to QRegExp::FixedString, which means the pattern to be | 
| 86 |     matched is interpreted as a plain string, i.e., special characters | 
| 87 |     (e.g., backslash) are not escaped. | 
| 88 |  | 
| 89 |     A good text on regexps is \e {Mastering Regular Expressions} | 
| 90 |     (Third Edition) by Jeffrey E. F.  Friedl, ISBN 0-596-52812-4. | 
| 91 |  | 
| 92 |     \note In Qt 5, the new QRegularExpression class provides a Perl | 
| 93 |     compatible implementation of regular expressions and is recommended | 
| 94 |     in place of QRegExp. | 
| 95 |  | 
| 96 |     \section1 Introduction | 
| 97 |  | 
| 98 |     Regexps are built up from expressions, quantifiers, and | 
| 99 |     assertions. The simplest expression is a character, e.g. \b{x} | 
| 100 |     or \b{5}. An expression can also be a set of characters | 
| 101 |     enclosed in square brackets. \b{[ABCD]} will match an \b{A} | 
| 102 |     or a \b{B} or a \b{C} or a \b{D}. We can write this same | 
| 103 |     expression as \b{[A-D]}, and an expression to match any | 
| 104 |     capital letter in the English alphabet is written as | 
| 105 |     \b{[A-Z]}. | 
| 106 |  | 
| 107 |     A quantifier specifies the number of occurrences of an expression | 
| 108 |     that must be matched. \b{x{1,1}} means match one and only one | 
| 109 |     \b{x}. \b{x{1,5}} means match a sequence of \b{x} | 
| 110 |     characters that contains at least one \b{x} but no more than | 
| 111 |     five. | 
| 112 |  | 
| 113 |     Note that in general regexps cannot be used to check for balanced | 
| 114 |     brackets or tags. For example, a regexp can be written to match an | 
| 115 |     opening html \c{<b>} and its closing \c{</b>}, if the \c{<b>} tags | 
| 116 |     are not nested, but if the \c{<b>} tags are nested, that same | 
| 117 |     regexp will match an opening \c{<b>} tag with the wrong closing | 
| 118 |     \c{</b>}.  For the fragment \c{<b>bold <b>bolder</b></b>}, the | 
| 119 |     first \c{<b>} would be matched with the first \c{</b>}, which is | 
| 120 |     not correct. However, it is possible to write a regexp that will | 
| 121 |     match nested brackets or tags correctly, but only if the number of | 
| 122 |     nesting levels is fixed and known. If the number of nesting levels | 
| 123 |     is not fixed and known, it is impossible to write a regexp that | 
| 124 |     will not fail. | 
| 125 |  | 
| 126 |     Suppose we want a regexp to match integers in the range 0 to 99. | 
| 127 |     At least one digit is required, so we start with the expression | 
| 128 |     \b{[0-9]{1,1}}, which matches a single digit exactly once. This | 
| 129 |     regexp matches integers in the range 0 to 9. To match integers up | 
| 130 |     to 99, increase the maximum number of occurrences to 2, so the | 
| 131 |     regexp becomes \b{[0-9]{1,2}}. This regexp satisfies the | 
| 132 |     original requirement to match integers from 0 to 99, but it will | 
| 133 |     also match integers that occur in the middle of strings. If we | 
| 134 |     want the matched integer to be the whole string, we must use the | 
| 135 |     anchor assertions, \b{^} (caret) and \b{$} (dollar). When | 
| 136 |     \b{^} is the first character in a regexp, it means the regexp | 
| 137 |     must match from the beginning of the string. When \b{$} is the | 
| 138 |     last character of the regexp, it means the regexp must match to | 
| 139 |     the end of the string. The regexp becomes \b{^[0-9]{1,2}$}. | 
| 140 |     Note that assertions, e.g. \b{^} and \b{$}, do not match | 
| 141 |     characters but locations in the string. | 
| 142 |  | 
| 143 |     If you have seen regexps described elsewhere, they may have looked | 
| 144 |     different from the ones shown here. This is because some sets of | 
| 145 |     characters and some quantifiers are so common that they have been | 
| 146 |     given special symbols to represent them. \b{[0-9]} can be | 
| 147 |     replaced with the symbol \b{\\d}. The quantifier to match | 
| 148 |     exactly one occurrence, \b{{1,1}}, can be replaced with the | 
| 149 |     expression itself, i.e. \b{x{1,1}} is the same as \b{x}. So | 
| 150 |     our 0 to 99 matcher could be written as \b{^\\d{1,2}$}. It can | 
| 151 |     also be written \b{^\\d\\d{0,1}$}, i.e. \e{From the start of | 
| 152 |     the string, match a digit, followed immediately by 0 or 1 digits}. | 
| 153 |     In practice, it would be written as \b{^\\d\\d?$}. The \b{?} | 
| 154 |     is shorthand for the quantifier \b{{0,1}}, i.e. 0 or 1 | 
| 155 |     occurrences. \b{?} makes an expression optional. The regexp | 
| 156 |     \b{^\\d\\d?$} means \e{From the beginning of the string, match | 
| 157 |     one digit, followed immediately by 0 or 1 more digit, followed | 
| 158 |     immediately by end of string}. | 
| 159 |  | 
| 160 |     To write a regexp that matches one of the words 'mail' \e or | 
| 161 |     'letter' \e or 'correspondence' but does not match words that | 
| 162 |     contain these words, e.g., 'email', 'mailman', 'mailer', and | 
| 163 |     'letterbox', start with a regexp that matches 'mail'. Expressed | 
| 164 |     fully, the regexp is \b{m{1,1}a{1,1}i{1,1}l{1,1}}, but because | 
| 165 |     a character expression is automatically quantified by | 
| 166 |     \b{{1,1}}, we can simplify the regexp to \b{mail}, i.e., an | 
| 167 |     'm' followed by an 'a' followed by an 'i' followed by an 'l'. Now | 
| 168 |     we can use the vertical bar \b{|}, which means \b{or}, to | 
| 169 |     include the other two words, so our regexp for matching any of the | 
| 170 |     three words becomes \b{mail|letter|correspondence}. Match | 
| 171 |     'mail' \b{or} 'letter' \b{or} 'correspondence'. While this | 
| 172 |     regexp will match one of the three words we want to match, it will | 
| 173 |     also match words we don't want to match, e.g., 'email'.  To | 
| 174 |     prevent the regexp from matching unwanted words, we must tell it | 
| 175 |     to begin and end the match at word boundaries. First we enclose | 
| 176 |     our regexp in parentheses, \b{(mail|letter|correspondence)}. | 
| 177 |     Parentheses group expressions together, and they identify a part | 
| 178 |     of the regexp that we wish to \l{capturing text}{capture}. | 
| 179 |     Enclosing the expression in parentheses allows us to use it as a | 
| 180 |     component in more complex regexps. It also allows us to examine | 
| 181 |     which of the three words was actually matched. To force the match | 
| 182 |     to begin and end on word boundaries, we enclose the regexp in | 
| 183 |     \b{\\b} \e{word boundary} assertions: | 
| 184 |     \b{\\b(mail|letter|correspondence)\\b}.  Now the regexp means: | 
| 185 |     \e{Match a word boundary, followed by the regexp in parentheses, | 
| 186 |     followed by a word boundary}. The \b{\\b} assertion matches a | 
| 187 |     \e position in the regexp, not a \e character. A word boundary is | 
| 188 |     any non-word character, e.g., a space, newline, or the beginning | 
| 189 |     or ending of a string. | 
| 190 |  | 
| 191 |     If we want to replace ampersand characters with the HTML entity | 
| 192 |     \b{\&}, the regexp to match is simply \b{\&}. But this | 
| 193 |     regexp will also match ampersands that have already been converted | 
| 194 |     to HTML entities. We want to replace only ampersands that are not | 
| 195 |     already followed by \b{amp;}. For this, we need the negative | 
| 196 |     lookahead assertion, \b{(?!}__\b{)}. The regexp can then be | 
| 197 |     written as \b{\&(?!amp;)}, i.e. \e{Match an ampersand that is} | 
| 198 |     \b{not} \e{followed by} \b{amp;}. | 
| 199 |  | 
| 200 |     If we want to count all the occurrences of 'Eric' and 'Eirik' in a | 
| 201 |     string, two valid solutions are \b{\\b(Eric|Eirik)\\b} and | 
| 202 |     \b{\\bEi?ri[ck]\\b}. The word boundary assertion '\\b' is | 
| 203 |     required to avoid matching words that contain either name, | 
| 204 |     e.g. 'Ericsson'. Note that the second regexp matches more | 
| 205 |     spellings than we want: 'Eric', 'Erik', 'Eiric' and 'Eirik'. | 
| 206 |  | 
| 207 |     Some of the examples discussed above are implemented in the | 
| 208 |     \l{#code-examples}{code examples} section. | 
| 209 |  | 
| 210 |     \target characters-and-abbreviations-for-sets-of-characters | 
| 211 |     \section1 Characters and Abbreviations for Sets of Characters | 
| 212 |  | 
| 213 |     \table | 
| 214 |     \header \li Element \li Meaning | 
| 215 |     \row \li \b{c} | 
| 216 |          \li A character represents itself unless it has a special | 
| 217 |          regexp meaning. e.g. \b{c} matches the character \e c. | 
| 218 |     \row \li \b{\\c} | 
| 219 |          \li A character that follows a backslash matches the character | 
| 220 |          itself, except as specified below. e.g., To match a literal | 
| 221 |          caret at the beginning of a string, write \b{\\^}. | 
| 222 |     \row \li \b{\\a} | 
| 223 |          \li Matches the ASCII bell (BEL, 0x07). | 
| 224 |     \row \li \b{\\f} | 
| 225 |          \li Matches the ASCII form feed (FF, 0x0C). | 
| 226 |     \row \li \b{\\n} | 
| 227 |          \li Matches the ASCII line feed (LF, 0x0A, Unix newline). | 
| 228 |     \row \li \b{\\r} | 
| 229 |          \li Matches the ASCII carriage return (CR, 0x0D). | 
| 230 |     \row \li \b{\\t} | 
| 231 |          \li Matches the ASCII horizontal tab (HT, 0x09). | 
| 232 |     \row \li \b{\\v} | 
| 233 |          \li Matches the ASCII vertical tab (VT, 0x0B). | 
| 234 |     \row \li \b{\\x\e{hhhh}} | 
| 235 |          \li Matches the Unicode character corresponding to the | 
| 236 |          hexadecimal number \e{hhhh} (between 0x0000 and 0xFFFF). | 
| 237 |     \row \li \b{\\0\e{ooo}} (i.e., \\zero \e{ooo}) | 
| 238 |          \li matches the ASCII/Latin1 character for the octal number | 
| 239 |          \e{ooo} (between 0 and 0377). | 
| 240 |     \row \li \b{. (dot)} | 
| 241 |          \li Matches any character (including newline). | 
| 242 |     \row \li \b{\\d} | 
| 243 |          \li Matches a digit (QChar::isDigit()). | 
| 244 |     \row \li \b{\\D} | 
| 245 |          \li Matches a non-digit. | 
| 246 |     \row \li \b{\\s} | 
| 247 |          \li Matches a whitespace character (QChar::isSpace()). | 
| 248 |     \row \li \b{\\S} | 
| 249 |          \li Matches a non-whitespace character. | 
| 250 |     \row \li \b{\\w} | 
| 251 |          \li Matches a word character (QChar::isLetterOrNumber(), QChar::isMark(), or '_'). | 
| 252 |     \row \li \b{\\W} | 
| 253 |          \li Matches a non-word character. | 
| 254 |     \row \li \b{\\\e{n}} | 
| 255 |          \li The \e{n}-th backreference, e.g. \\1, \\2, etc. | 
| 256 |     \endtable | 
| 257 |  | 
| 258 |     \b{Note:} The C++ compiler transforms backslashes in strings. | 
| 259 |     To include a \b{\\} in a regexp, enter it twice, i.e. \c{\\}. | 
| 260 |     To match the backslash character itself, enter it four times, i.e. | 
| 261 |     \c{\\\\}. | 
| 262 |  | 
| 263 |     \target sets-of-characters | 
| 264 |     \section1 Sets of Characters | 
| 265 |  | 
| 266 |     Square brackets mean match any character contained in the square | 
| 267 |     brackets. The character set abbreviations described above can | 
| 268 |     appear in a character set in square brackets. Except for the | 
| 269 |     character set abbreviations and the following two exceptions, | 
| 270 |     characters do not have special meanings in square brackets. | 
| 271 |  | 
| 272 |     \table | 
| 273 |     \row \li \b{^} | 
| 274 |  | 
| 275 |          \li The caret negates the character set if it occurs as the | 
| 276 |          first character (i.e. immediately after the opening square | 
| 277 |          bracket). \b{[abc]} matches 'a' or 'b' or 'c', but | 
| 278 |          \b{[^abc]} matches anything \e but 'a' or 'b' or 'c'. | 
| 279 |  | 
| 280 |     \row \li \b{-} | 
| 281 |  | 
| 282 |          \li The dash indicates a range of characters. \b{[W-Z]} | 
| 283 |          matches 'W' or 'X' or 'Y' or 'Z'. | 
| 284 |  | 
| 285 |     \endtable | 
| 286 |  | 
| 287 |     Using the predefined character set abbreviations is more portable | 
| 288 |     than using character ranges across platforms and languages. For | 
| 289 |     example, \b{[0-9]} matches a digit in Western alphabets but | 
| 290 |     \b{\\d} matches a digit in \e any alphabet. | 
| 291 |  | 
| 292 |     Note: In other regexp documentation, sets of characters are often | 
| 293 |     called "character classes". | 
| 294 |  | 
| 295 |     \target quantifiers | 
| 296 |     \section1 Quantifiers | 
| 297 |  | 
| 298 |     By default, an expression is automatically quantified by | 
| 299 |     \b{{1,1}}, i.e. it should occur exactly once. In the following | 
| 300 |     list, \b{\e {E}} stands for expression. An expression is a | 
| 301 |     character, or an abbreviation for a set of characters, or a set of | 
| 302 |     characters in square brackets, or an expression in parentheses. | 
| 303 |  | 
| 304 |     \table | 
| 305 |     \row \li \b{\e {E}?} | 
| 306 |  | 
| 307 |          \li Matches zero or one occurrences of \e E. This quantifier | 
| 308 |          means \e{The previous expression is optional}, because it | 
| 309 |          will match whether or not the expression is found. \b{\e | 
| 310 |          {E}?} is the same as \b{\e {E}{0,1}}. e.g., \b{dents?} | 
| 311 |          matches 'dent' or 'dents'. | 
| 312 |  | 
| 313 |     \row \li \b{\e {E}+} | 
| 314 |  | 
| 315 |          \li Matches one or more occurrences of \e E. \b{\e {E}+} is | 
| 316 |          the same as \b{\e {E}{1,}}. e.g., \b{0+} matches '0', | 
| 317 |          '00', '000', etc. | 
| 318 |  | 
| 319 |     \row \li \b{\e {E}*} | 
| 320 |  | 
| 321 |          \li Matches zero or more occurrences of \e E. It is the same | 
| 322 |          as \b{\e {E}{0,}}. The \b{*} quantifier is often used | 
| 323 |          in error where \b{+} should be used. For example, if | 
| 324 |          \b{\\s*$} is used in an expression to match strings that | 
| 325 |          end in whitespace, it will match every string because | 
| 326 |          \b{\\s*$} means \e{Match zero or more whitespaces followed | 
| 327 |          by end of string}. The correct regexp to match strings that | 
| 328 |          have at least one trailing whitespace character is | 
| 329 |          \b{\\s+$}. | 
| 330 |  | 
| 331 |     \row \li \b{\e {E}{n}} | 
| 332 |  | 
| 333 |          \li Matches exactly \e n occurrences of \e E. \b{\e {E}{n}} | 
| 334 |          is the same as repeating \e E \e n times. For example, | 
| 335 |          \b{x{5}} is the same as \b{xxxxx}. It is also the same | 
| 336 |          as \b{\e {E}{n,n}}, e.g. \b{x{5,5}}. | 
| 337 |  | 
| 338 |     \row \li \b{\e {E}{n,}} | 
| 339 |          \li Matches at least \e n occurrences of \e E. | 
| 340 |  | 
| 341 |     \row \li \b{\e {E}{,m}} | 
| 342 |          \li Matches at most \e m occurrences of \e E. \b{\e {E}{,m}} | 
| 343 |          is the same as \b{\e {E}{0,m}}. | 
| 344 |  | 
| 345 |     \row \li \b{\e {E}{n,m}} | 
| 346 |          \li Matches at least \e n and at most \e m occurrences of \e E. | 
| 347 |     \endtable | 
| 348 |  | 
| 349 |     To apply a quantifier to more than just the preceding character, | 
| 350 |     use parentheses to group characters together in an expression. For | 
| 351 |     example, \b{tag+} matches a 't' followed by an 'a' followed by | 
| 352 |     at least one 'g', whereas \b{(tag)+} matches at least one | 
| 353 |     occurrence of 'tag'. | 
| 354 |  | 
| 355 |     Note: Quantifiers are normally "greedy". They always match as much | 
| 356 |     text as they can. For example, \b{0+} matches the first zero it | 
| 357 |     finds and all the consecutive zeros after the first zero. Applied | 
| 358 |     to '20005', it matches '2\underline{000}5'. Quantifiers can be made | 
| 359 |     non-greedy, see setMinimal(). | 
| 360 |  | 
| 361 |     \target capturing parentheses | 
| 362 |     \target backreferences | 
| 363 |     \section1 Capturing Text | 
| 364 |  | 
| 365 |     Parentheses allow us to group elements together so that we can | 
| 366 |     quantify and capture them. For example if we have the expression | 
| 367 |     \b{mail|letter|correspondence} that matches a string we know | 
| 368 |     that \e one of the words matched but not which one. Using | 
| 369 |     parentheses allows us to "capture" whatever is matched within | 
| 370 |     their bounds, so if we used \b{(mail|letter|correspondence)} | 
| 371 |     and matched this regexp against the string "I sent you some email" | 
| 372 |     we can use the cap() or capturedTexts() functions to extract the | 
| 373 |     matched characters, in this case 'mail'. | 
| 374 |  | 
| 375 |     We can use captured text within the regexp itself. To refer to the | 
| 376 |     captured text we use \e backreferences which are indexed from 1, | 
| 377 |     the same as for cap(). For example we could search for duplicate | 
| 378 |     words in a string using \b{\\b(\\w+)\\W+\\1\\b} which means match a | 
| 379 |     word boundary followed by one or more word characters followed by | 
| 380 |     one or more non-word characters followed by the same text as the | 
| 381 |     first parenthesized expression followed by a word boundary. | 
| 382 |  | 
| 383 |     If we want to use parentheses purely for grouping and not for | 
| 384 |     capturing we can use the non-capturing syntax, e.g. | 
| 385 |     \b{(?:green|blue)}. Non-capturing parentheses begin '(?:' and | 
| 386 |     end ')'. In this example we match either 'green' or 'blue' but we | 
| 387 |     do not capture the match so we only know whether or not we matched | 
| 388 |     but not which color we actually found. Using non-capturing | 
| 389 |     parentheses is more efficient than using capturing parentheses | 
| 390 |     since the regexp engine has to do less book-keeping. | 
| 391 |  | 
| 392 |     Both capturing and non-capturing parentheses may be nested. | 
| 393 |  | 
| 394 |     \target greedy quantifiers | 
| 395 |  | 
| 396 |     For historical reasons, quantifiers (e.g. \b{*}) that apply to | 
| 397 |     capturing parentheses are more "greedy" than other quantifiers. | 
| 398 |     For example, \b{a*(a*)} will match "aaa" with cap(1) == "aaa". | 
| 399 |     This behavior is different from what other regexp engines do | 
| 400 |     (notably, Perl). To obtain a more intuitive capturing behavior, | 
| 401 |     specify QRegExp::RegExp2 to the QRegExp constructor or call | 
| 402 |     setPatternSyntax(QRegExp::RegExp2). | 
| 403 |  | 
| 404 |     \target cap_in_a_loop | 
| 405 |  | 
| 406 |     When the number of matches cannot be determined in advance, a | 
| 407 |     common idiom is to use cap() in a loop. For example: | 
| 408 |  | 
| 409 |     \snippet code/src_corelib_text_qregexp.cpp 0 | 
| 410 |  | 
| 411 |     \target assertions | 
| 412 |     \section1 Assertions | 
| 413 |  | 
| 414 |     Assertions make some statement about the text at the point where | 
| 415 |     they occur in the regexp but they do not match any characters. In | 
| 416 |     the following list \b{\e {E}} stands for any expression. | 
| 417 |  | 
| 418 |     \table | 
| 419 |     \row \li \b{^} | 
| 420 |          \li The caret signifies the beginning of the string. If you | 
| 421 |          wish to match a literal \c{^} you must escape it by | 
| 422 |          writing \c{\\^}. For example, \b{^#include} will only | 
| 423 |          match strings which \e begin with the characters '#include'. | 
| 424 |          (When the caret is the first character of a character set it | 
| 425 |          has a special meaning, see \l{#sets-of-characters}{Sets of Characters}.) | 
| 426 |  | 
| 427 |     \row \li \b{$} | 
| 428 |          \li The dollar signifies the end of the string. For example | 
| 429 |          \b{\\d\\s*$} will match strings which end with a digit | 
| 430 |          optionally followed by whitespace. If you wish to match a | 
| 431 |          literal \c{$} you must escape it by writing | 
| 432 |          \c{\\$}. | 
| 433 |  | 
| 434 |     \row \li \b{\\b} | 
| 435 |          \li A word boundary. For example the regexp | 
| 436 |          \b{\\bOK\\b} means match immediately after a word | 
| 437 |          boundary (e.g. start of string or whitespace) the letter 'O' | 
| 438 |          then the letter 'K' immediately before another word boundary | 
| 439 |          (e.g. end of string or whitespace). But note that the | 
| 440 |          assertion does not actually match any whitespace so if we | 
| 441 |          write \b{(\\bOK\\b)} and we have a match it will only | 
| 442 |          contain 'OK' even if the string is "It's \underline{OK} now". | 
| 443 |  | 
| 444 |     \row \li \b{\\B} | 
| 445 |          \li A non-word boundary. This assertion is true wherever | 
| 446 |          \b{\\b} is false. For example if we searched for | 
| 447 |          \b{\\Bon\\B} in "Left on" the match would fail (space | 
| 448 |          and end of string aren't non-word boundaries), but it would | 
| 449 |          match in "t\underline{on}ne". | 
| 450 |  | 
| 451 |     \row \li \b{(?=\e E)} | 
| 452 |          \li Positive lookahead. This assertion is true if the | 
| 453 |          expression matches at this point in the regexp. For example, | 
| 454 |          \b{const(?=\\s+char)} matches 'const' whenever it is | 
| 455 |          followed by 'char', as in 'static \underline{const} char *'. | 
| 456 |          (Compare with \b{const\\s+char}, which matches 'static | 
| 457 |          \underline{const char} *'.) | 
| 458 |  | 
| 459 |     \row \li \b{(?!\e E)} | 
| 460 |          \li Negative lookahead. This assertion is true if the | 
| 461 |          expression does not match at this point in the regexp. For | 
| 462 |          example, \b{const(?!\\s+char)} matches 'const' \e except | 
| 463 |          when it is followed by 'char'. | 
| 464 |     \endtable | 
| 465 |  | 
| 466 |     \target QRegExp wildcard matching | 
| 467 |     \section1 Wildcard Matching | 
| 468 |  | 
| 469 |     Most command shells such as \e bash or \e cmd.exe support "file | 
| 470 |     globbing", the ability to identify a group of files by using | 
| 471 |     wildcards. The setPatternSyntax() function is used to switch | 
| 472 |     between regexp and wildcard mode. Wildcard matching is much | 
| 473 |     simpler than full regexps and has only four features: | 
| 474 |  | 
| 475 |     \table | 
| 476 |     \row \li \b{c} | 
| 477 |          \li Any character represents itself apart from those mentioned | 
| 478 |          below. Thus \b{c} matches the character \e c. | 
| 479 |     \row \li \b{?} | 
| 480 |          \li Matches any single character. It is the same as | 
| 481 |          \b{.} in full regexps. | 
| 482 |     \row \li \b{*} | 
| 483 |          \li Matches zero or more of any characters. It is the | 
| 484 |          same as \b{.*} in full regexps. | 
| 485 |     \row \li \b{[...]} | 
| 486 |          \li Sets of characters can be represented in square brackets, | 
| 487 |          similar to full regexps. Within the character class, like | 
| 488 |          outside, backslash has no special meaning. | 
| 489 |     \endtable | 
| 490 |  | 
| 491 |     In the mode Wildcard, the wildcard characters cannot be | 
| 492 |     escaped. In the mode WildcardUnix, the character '\\' escapes the | 
| 493 |     wildcard. | 
| 494 |  | 
| 495 |     For example if we are in wildcard mode and have strings which | 
| 496 |     contain filenames we could identify HTML files with \b{*.html}. | 
| 497 |     This will match zero or more characters followed by a dot followed | 
| 498 |     by 'h', 't', 'm' and 'l'. | 
| 499 |  | 
| 500 |     To test a string against a wildcard expression, use exactMatch(). | 
| 501 |     For example: | 
| 502 |  | 
| 503 |     \snippet code/src_corelib_text_qregexp.cpp 1 | 
| 504 |  | 
| 505 |     \target perl-users | 
| 506 |     \section1 Notes for Perl Users | 
| 507 |  | 
| 508 |     Most of the character class abbreviations supported by Perl are | 
| 509 |     supported by QRegExp, see \l{#characters-and-abbreviations-for-sets-of-characters} | 
| 510 |     {characters and abbreviations for sets of characters}. | 
| 511 |  | 
| 512 |     In QRegExp, apart from within character classes, \c{^} always | 
| 513 |     signifies the start of the string, so carets must always be | 
| 514 |     escaped unless used for that purpose. In Perl the meaning of caret | 
| 515 |     varies automagically depending on where it occurs so escaping it | 
| 516 |     is rarely necessary. The same applies to \c{$} which in | 
| 517 |     QRegExp always signifies the end of the string. | 
| 518 |  | 
| 519 |     QRegExp's quantifiers are the same as Perl's greedy quantifiers | 
| 520 |     (but see the \l{greedy quantifiers}{note above}). Non-greedy | 
| 521 |     matching cannot be applied to individual quantifiers, but can be | 
| 522 |     applied to all the quantifiers in the pattern. For example, to | 
| 523 |     match the Perl regexp \b{ro+?m} requires: | 
| 524 |  | 
| 525 |     \snippet code/src_corelib_text_qregexp.cpp 2 | 
| 526 |  | 
| 527 |     The equivalent of Perl's \c{/i} option is | 
| 528 |     setCaseSensitivity(Qt::CaseInsensitive). | 
| 529 |  | 
| 530 |     Perl's \c{/g} option can be emulated using a \l{#cap_in_a_loop}{loop}. | 
| 531 |  | 
| 532 |     In QRegExp \b{.} matches any character, therefore all QRegExp | 
| 533 |     regexps have the equivalent of Perl's \c{/s} option. QRegExp | 
| 534 |     does not have an equivalent to Perl's \c{/m} option, but this | 
| 535 |     can be emulated in various ways for example by splitting the input | 
| 536 |     into lines or by looping with a regexp that searches for newlines. | 
| 537 |  | 
| 538 |     Because QRegExp is string oriented, there are no \\A, \\Z, or \\z | 
| 539 |     assertions. The \\G assertion is not supported but can be emulated | 
| 540 |     in a loop. | 
| 541 |  | 
| 542 |     Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp | 
| 543 |     equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, | 
| 544 |     ... correspond to cap(1) or capturedTexts()[1], cap(2) or | 
| 545 |     capturedTexts()[2], etc. | 
| 546 |  | 
| 547 |     To substitute a pattern use QString::replace(). | 
| 548 |  | 
| 549 |     Perl's extended \c{/x} syntax is not supported, nor are | 
| 550 |     directives, e.g. (?i), or regexp comments, e.g. (?#comment). On | 
| 551 |     the other hand, C++'s rules for literal strings can be used to | 
| 552 |     achieve the same: | 
| 553 |  | 
| 554 |     \snippet code/src_corelib_text_qregexp.cpp 3 | 
| 555 |  | 
| 556 |     Both zero-width positive and zero-width negative lookahead | 
| 557 |     assertions (?=pattern) and (?!pattern) are supported with the same | 
| 558 |     syntax as Perl. Perl's lookbehind assertions, "independent" | 
| 559 |     subexpressions and conditional expressions are not supported. | 
| 560 |  | 
| 561 |     Non-capturing parentheses are also supported, with the same | 
| 562 |     (?:pattern) syntax. | 
| 563 |  | 
| 564 |     See QString::split() and QStringList::join() for equivalents | 
| 565 |     to Perl's split and join functions. | 
| 566 |  | 
| 567 |     Note: because C++ transforms \\'s they must be written \e twice in | 
| 568 |     code, e.g. \b{\\b} must be written \b{\\\\b}. | 
| 569 |  | 
| 570 |     \target code-examples | 
| 571 |     \section1 Code Examples | 
| 572 |  | 
| 573 |     \snippet code/src_corelib_text_qregexp.cpp 4 | 
| 574 |  | 
| 575 |     The third string matches '\underline{6}'. This is a simple validation | 
| 576 |     regexp for integers in the range 0 to 99. | 
| 577 |  | 
| 578 |     \snippet code/src_corelib_text_qregexp.cpp 5 | 
| 579 |  | 
| 580 |     The second string matches '\underline{This_is-OK}'. We've used the | 
| 581 |     character set abbreviation '\\S' (non-whitespace) and the anchors | 
| 582 |     to match strings which contain no whitespace. | 
| 583 |  | 
| 584 |     In the following example we match strings containing 'mail' or | 
| 585 |     'letter' or 'correspondence' but only match whole words i.e. not | 
| 586 |     'email' | 
| 587 |  | 
| 588 |     \snippet code/src_corelib_text_qregexp.cpp 6 | 
| 589 |  | 
| 590 |     The second string matches "Please write the \underline{letter}". The | 
| 591 |     word 'letter' is also captured (because of the parentheses). We | 
| 592 |     can see what text we've captured like this: | 
| 593 |  | 
| 594 |     \snippet code/src_corelib_text_qregexp.cpp 7 | 
| 595 |  | 
| 596 |     This will capture the text from the first set of capturing | 
| 597 |     parentheses (counting capturing left parentheses from left to | 
| 598 |     right). The parentheses are counted from 1 since cap(0) is the | 
| 599 |     whole matched regexp (equivalent to '&' in most regexp engines). | 
| 600 |  | 
| 601 |     \snippet code/src_corelib_text_qregexp.cpp 8 | 
| 602 |  | 
| 603 |     Here we've passed the QRegExp to QString's replace() function to | 
| 604 |     replace the matched text with new text. | 
| 605 |  | 
| 606 |     \snippet code/src_corelib_text_qregexp.cpp 9 | 
| 607 |  | 
| 608 |     We've used the indexIn() function to repeatedly match the regexp in | 
| 609 |     the string. Note that instead of moving forward by one character | 
| 610 |     at a time \c pos++ we could have written \c {pos += | 
| 611 |     rx.matchedLength()} to skip over the already matched string. The | 
| 612 |     count will equal 3, matching 'One \underline{Eric} another | 
| 613 |     \underline{Eirik}, and an Ericsson. How many Eiriks, \underline{Eric}?'; it | 
| 614 |     doesn't match 'Ericsson' or 'Eiriks' because they are not bounded | 
| 615 |     by non-word boundaries. | 
| 616 |  | 
| 617 |     One common use of regexps is to split lines of delimited data into | 
| 618 |     their component fields. | 
| 619 |  | 
| 620 |     \snippet code/src_corelib_text_qregexp.cpp 10 | 
| 621 |  | 
| 622 |     In this example our input lines have the format company name, web | 
| 623 |     address and country. Unfortunately the regexp is rather long and | 
| 624 |     not very versatile -- the code will break if we add any more | 
| 625 |     fields. A simpler and better solution is to look for the | 
| 626 |     separator, '\\t' in this case, and take the surrounding text. The | 
| 627 |     QString::split() function can take a separator string or regexp | 
| 628 |     as an argument and split a string accordingly. | 
| 629 |  | 
| 630 |     \snippet code/src_corelib_text_qregexp.cpp 11 | 
| 631 |  | 
| 632 |     Here field[0] is the company, field[1] the web address and so on. | 
| 633 |  | 
| 634 |     To imitate the matching of a shell we can use wildcard mode. | 
| 635 |  | 
| 636 |     \snippet code/src_corelib_text_qregexp.cpp 12 | 
| 637 |  | 
| 638 |     Wildcard matching can be convenient because of its simplicity, but | 
| 639 |     any wildcard regexp can be defined using full regexps, e.g. | 
| 640 |     \b{.*\\.html$}. Notice that we can't match both \c .html and \c | 
| 641 |     .htm files with a wildcard unless we use \b{*.htm*} which will | 
| 642 |     also match 'test.html.bak'. A full regexp gives us the precision | 
| 643 |     we need, \b{.*\\.html?$}. | 
| 644 |  | 
| 645 |     QRegExp can match case insensitively using setCaseSensitivity(), | 
| 646 |     and can use non-greedy matching, see setMinimal(). By | 
| 647 |     default QRegExp uses full regexps but this can be changed with | 
| 648 |     setPatternSyntax(). Searching can be done forward with indexIn() or backward | 
| 649 |     with lastIndexIn(). Captured text can be accessed using | 
| 650 |     capturedTexts() which returns a string list of all captured | 
| 651 |     strings, or using cap() which returns the captured string for the | 
| 652 |     given index. The pos() function takes a match index and returns | 
| 653 |     the position in the string where the match was made (or -1 if | 
| 654 |     there was no match). | 
| 655 |  | 
| 656 |     \sa QString, QStringList, QSortFilterProxyModel | 
| 657 |  | 
| 658 |     \section1 Porting to QRegularExpression | 
| 659 |  | 
| 660 |     \include corelib/port-from-qregexp.qdocinc porting-to-qregularexpression | 
| 661 | */ | 
| 662 |  | 
| 663 | #if defined(Q_OS_VXWORKS) && defined(EOS) | 
| 664 | #  undef EOS | 
| 665 | #endif | 
| 666 |  | 
| 667 | const int NumBadChars = 64; | 
| 668 | #define BadChar(ch) ((ch).unicode() % NumBadChars) | 
| 669 |  | 
| 670 | const int NoOccurrence = INT_MAX; | 
| 671 | const int EmptyCapture = INT_MAX; | 
| 672 | const int InftyLen = INT_MAX; | 
| 673 | const int InftyRep = 1025; | 
| 674 | const int EOS = -1; | 
| 675 |  | 
| 676 | static bool isWord(QChar ch) | 
| 677 | { | 
| 678 |     return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_'); | 
| 679 | } | 
| 680 |  | 
| 681 | /* | 
| 682 |   Merges two vectors of ints and puts the result into the first | 
| 683 |   one. | 
| 684 | */ | 
| 685 | static void mergeInto(QList<int> *a, const QList<int> &b) | 
| 686 | { | 
| 687 |     int asize = a->size(); | 
| 688 |     int bsize = b.size(); | 
| 689 |     if (asize == 0) { | 
| 690 |         *a = b; | 
| 691 | #ifndef QT_NO_REGEXP_OPTIM | 
| 692 |     } else if (bsize == 1 && a->at(i: asize - 1) < b.at(i: 0)) { | 
| 693 |         a->resize(size: asize + 1); | 
| 694 |         (*a)[asize] = b.at(i: 0); | 
| 695 | #endif | 
| 696 |     } else if (bsize >= 1) { | 
| 697 |         int csize = asize + bsize; | 
| 698 |         QList<int> c(csize); | 
| 699 |         int i = 0, j = 0, k = 0; | 
| 700 |         while (i < asize) { | 
| 701 |             if (j < bsize) { | 
| 702 |                 if (a->at(i) == b.at(i: j)) { | 
| 703 |                     ++i; | 
| 704 |                     --csize; | 
| 705 |                 } else if (a->at(i) < b.at(i: j)) { | 
| 706 |                     c[k++] = a->at(i: i++); | 
| 707 |                 } else { | 
| 708 |                     c[k++] = b.at(i: j++); | 
| 709 |                 } | 
| 710 |             } else { | 
| 711 |                 memcpy(dest: c.data() + k, src: a->constData() + i, n: (asize - i) * sizeof(int)); | 
| 712 |                 break; | 
| 713 |             } | 
| 714 |         } | 
| 715 |         c.resize(size: csize); | 
| 716 |         if (j < bsize) | 
| 717 |             memcpy(dest: c.data() + k, src: b.constData() + j, n: (bsize - j) * sizeof(int)); | 
| 718 |         *a = c; | 
| 719 |     } | 
| 720 | } | 
| 721 |  | 
| 722 | #ifndef QT_NO_REGEXP_WILDCARD | 
| 723 | /* | 
| 724 |   Translates a wildcard pattern to an equivalent regular expression | 
| 725 |   pattern (e.g., *.cpp to .*\.cpp). | 
| 726 |  | 
| 727 |   If enableEscaping is true, it is possible to escape the wildcard | 
| 728 |   characters with \ | 
| 729 | */ | 
| 730 | static QString wc2rx(const QString &wc_str, const bool enableEscaping) | 
| 731 | { | 
| 732 |     const int wclen = wc_str.size(); | 
| 733 |     QString rx; | 
| 734 |     int i = 0; | 
| 735 |     bool isEscaping = false; // the previous character is '\' | 
| 736 |     const QChar *wc = wc_str.unicode(); | 
| 737 |  | 
| 738 |     while (i < wclen) { | 
| 739 |         const QChar c = wc[i++]; | 
| 740 |         switch (c.unicode()) { | 
| 741 |         case '\\': | 
| 742 |             if (enableEscaping) { | 
| 743 |                 if (isEscaping) { | 
| 744 |                     rx += QLatin1String("\\\\" ); | 
| 745 |                 } // we insert the \\ later if necessary | 
| 746 |                 if (i == wclen) { // the end | 
| 747 |                     rx += QLatin1String("\\\\" ); | 
| 748 |                 } | 
| 749 |             } else { | 
| 750 |                 rx += QLatin1String("\\\\" ); | 
| 751 |             } | 
| 752 |             isEscaping = true; | 
| 753 |             break; | 
| 754 |         case '*': | 
| 755 |             if (isEscaping) { | 
| 756 |                 rx += QLatin1String("\\*" ); | 
| 757 |                 isEscaping = false; | 
| 758 |             } else { | 
| 759 |                 rx += QLatin1String(".*" ); | 
| 760 |             } | 
| 761 |             break; | 
| 762 |         case '?': | 
| 763 |             if (isEscaping) { | 
| 764 |                 rx += QLatin1String("\\?" ); | 
| 765 |                 isEscaping = false; | 
| 766 |             } else { | 
| 767 |                 rx += QLatin1Char('.'); | 
| 768 |             } | 
| 769 |  | 
| 770 |             break; | 
| 771 |         case '$': | 
| 772 |         case '(': | 
| 773 |         case ')': | 
| 774 |         case '+': | 
| 775 |         case '.': | 
| 776 |         case '^': | 
| 777 |         case '{': | 
| 778 |         case '|': | 
| 779 |         case '}': | 
| 780 |             if (isEscaping) { | 
| 781 |                 isEscaping = false; | 
| 782 |                 rx += QLatin1String("\\\\" ); | 
| 783 |             } | 
| 784 |             rx += QLatin1Char('\\'); | 
| 785 |             rx += c; | 
| 786 |             break; | 
| 787 |          case '[': | 
| 788 |             if (isEscaping) { | 
| 789 |                 isEscaping = false; | 
| 790 |                 rx += QLatin1String("\\[" ); | 
| 791 |             } else { | 
| 792 |                 rx += c; | 
| 793 |                 if (wc[i] == QLatin1Char('^')) | 
| 794 |                     rx += wc[i++]; | 
| 795 |                 if (i < wclen) { | 
| 796 |                     if (wc[i] == QLatin1Char(']')) | 
| 797 |                         rx += wc[i++]; | 
| 798 |                     while (i < wclen && wc[i] != QLatin1Char(']')) { | 
| 799 |                         if (wc[i] == QLatin1Char('\\')) | 
| 800 |                             rx += QLatin1Char('\\'); | 
| 801 |                         rx += wc[i++]; | 
| 802 |                     } | 
| 803 |                 } | 
| 804 |             } | 
| 805 |              break; | 
| 806 |  | 
| 807 |         case ']': | 
| 808 |             if (isEscaping){ | 
| 809 |                 isEscaping = false; | 
| 810 |                 rx += QLatin1String("\\" ); | 
| 811 |             } | 
| 812 |             rx += c; | 
| 813 |             break; | 
| 814 |  | 
| 815 |         default: | 
| 816 |             if (isEscaping){ | 
| 817 |                 isEscaping = false; | 
| 818 |                 rx += QLatin1String("\\\\" ); | 
| 819 |             } | 
| 820 |             rx += c; | 
| 821 |         } | 
| 822 |     } | 
| 823 |     return rx; | 
| 824 | } | 
| 825 | #endif | 
| 826 |  | 
| 827 | static int caretIndex(int offset, QRegExp::CaretMode caretMode) | 
| 828 | { | 
| 829 |     if (caretMode == QRegExp::CaretAtZero) { | 
| 830 |         return 0; | 
| 831 |     } else if (caretMode == QRegExp::CaretAtOffset) { | 
| 832 |         return offset; | 
| 833 |     } else { // QRegExp::CaretWontMatch | 
| 834 |         return -1; | 
| 835 |     } | 
| 836 | } | 
| 837 |  | 
| 838 | /* | 
| 839 |     The QRegExpEngineKey struct uniquely identifies an engine. | 
| 840 | */ | 
| 841 | struct QRegExpEngineKey | 
| 842 | { | 
| 843 |     QString pattern; | 
| 844 |     QRegExp::PatternSyntax patternSyntax; | 
| 845 |     Qt::CaseSensitivity cs; | 
| 846 |  | 
| 847 |     inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax, | 
| 848 |                             Qt::CaseSensitivity cs) | 
| 849 |         : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {} | 
| 850 |  | 
| 851 |     inline void clear() { | 
| 852 |         pattern.clear(); | 
| 853 |         patternSyntax = QRegExp::RegExp; | 
| 854 |         cs = Qt::CaseSensitive; | 
| 855 |     } | 
| 856 | }; | 
| 857 |  | 
| 858 | static bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2) | 
| 859 | { | 
| 860 |     return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax | 
| 861 |            && key1.cs == key2.cs; | 
| 862 | } | 
| 863 |  | 
| 864 | static size_t qHash(const QRegExpEngineKey &key, size_t seed = 0) noexcept | 
| 865 | { | 
| 866 |     return qHashMulti(seed, args: key.pattern, args: key.patternSyntax, args: key.cs); | 
| 867 | } | 
| 868 |  | 
| 869 | class QRegExpEngine; | 
| 870 |  | 
| 871 | /* | 
| 872 |   This is the engine state during matching. | 
| 873 | */ | 
| 874 | struct QRegExpMatchState | 
| 875 | { | 
| 876 |     const QChar *in; // a pointer to the input string data | 
| 877 |     int pos; // the current position in the string | 
| 878 |     int caretPos; | 
| 879 |     int len; // the length of the input string | 
| 880 |     bool minimal; // minimal matching? | 
| 881 |     int *bigArray; // big array holding the data for the next pointers | 
| 882 |     int *inNextStack; // is state is nextStack? | 
| 883 |     int *curStack; // stack of current states | 
| 884 |     int *nextStack; // stack of next states | 
| 885 |     int *curCapBegin; // start of current states' captures | 
| 886 |     int *nextCapBegin; // start of next states' captures | 
| 887 |     int *curCapEnd; // end of current states' captures | 
| 888 |     int *nextCapEnd; // end of next states' captures | 
| 889 |     int *tempCapBegin; // start of temporary captures | 
| 890 |     int *tempCapEnd; // end of temporary captures | 
| 891 |     int *capBegin; // start of captures for a next state | 
| 892 |     int *capEnd; // end of captures for a next state | 
| 893 |     int *slideTab; // bump-along slide table for bad-character heuristic | 
| 894 |     int *captured; // what match() returned last | 
| 895 |     int slideTabSize; // size of slide table | 
| 896 |     int capturedSize; | 
| 897 | #ifndef QT_NO_REGEXP_BACKREF | 
| 898 |     QList<QList<int>> sleeping; // list of back-reference sleepers | 
| 899 | #endif | 
| 900 |     int matchLen; // length of match | 
| 901 |     int oneTestMatchedLen; // length of partial match | 
| 902 |  | 
| 903 |     const QRegExpEngine *eng; | 
| 904 |  | 
| 905 |     inline QRegExpMatchState() : bigArray(nullptr), captured(nullptr) {} | 
| 906 |     inline ~QRegExpMatchState() { free(ptr: bigArray); } | 
| 907 |  | 
| 908 |     void drain() { free(ptr: bigArray); bigArray = nullptr; captured = nullptr; } // to save memory | 
| 909 |     void prepareForMatch(QRegExpEngine *eng); | 
| 910 |     void match(const QChar *str, int len, int pos, bool minimal, | 
| 911 |         bool oneTest, int caretIndex); | 
| 912 |     bool matchHere(); | 
| 913 |     bool testAnchor(int i, int a, const int *capBegin); | 
| 914 | }; | 
| 915 |  | 
| 916 | /* | 
| 917 |   The struct QRegExpAutomatonState represents one state in a modified NFA. The | 
| 918 |   input characters matched are stored in the state instead of on | 
| 919 |   the transitions, something possible for an automaton | 
| 920 |   constructed from a regular expression. | 
| 921 | */ | 
| 922 | struct QRegExpAutomatonState | 
| 923 | { | 
| 924 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 925 |     int atom; // which atom does this state belong to? | 
| 926 | #endif | 
| 927 |     int match; // what does it match? (see CharClassBit and BackRefBit) | 
| 928 |     QList<int> outs; // out-transitions | 
| 929 |     QMap<int, int> reenter; // atoms reentered when transiting out | 
| 930 |     QMap<int, int> anchors; // anchors met when transiting out | 
| 931 |  | 
| 932 |     inline QRegExpAutomatonState() { } | 
| 933 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 934 |     inline QRegExpAutomatonState(int a, int m) | 
| 935 |         : atom(a), match(m) { } | 
| 936 | #else | 
| 937 |     inline QRegExpAutomatonState(int m) | 
| 938 |         : match(m) { } | 
| 939 | #endif | 
| 940 | }; | 
| 941 |  | 
| 942 | Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_RELOCATABLE_TYPE); | 
| 943 |  | 
| 944 | /* | 
| 945 |   The struct QRegExpCharClassRange represents a range of characters (e.g., | 
| 946 |   [0-9] denotes range 48 to 57). | 
| 947 | */ | 
| 948 | struct QRegExpCharClassRange | 
| 949 | { | 
| 950 |     ushort from; // 48 | 
| 951 |     ushort len; // 10 | 
| 952 | }; | 
| 953 |  | 
| 954 | Q_DECLARE_TYPEINFO(QRegExpCharClassRange, Q_PRIMITIVE_TYPE); | 
| 955 |  | 
| 956 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 957 | /* | 
| 958 |   The struct QRegExpAtom represents one node in the hierarchy of regular | 
| 959 |   expression atoms. | 
| 960 | */ | 
| 961 | struct QRegExpAtom | 
| 962 | { | 
| 963 |     enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 }; | 
| 964 |  | 
| 965 |     int parent; // index of parent in array of atoms | 
| 966 |     int capture; // index of capture, from 1 to ncap - 1 | 
| 967 | }; | 
| 968 |  | 
| 969 | Q_DECLARE_TYPEINFO(QRegExpAtom, Q_PRIMITIVE_TYPE); | 
| 970 | #endif | 
| 971 |  | 
| 972 | struct QRegExpLookahead; | 
| 973 |  | 
| 974 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 975 | /* | 
| 976 |   The struct QRegExpAnchorAlternation represents a pair of anchors with | 
| 977 |   OR semantics. | 
| 978 | */ | 
| 979 | struct QRegExpAnchorAlternation | 
| 980 | { | 
| 981 |     int a; // this anchor... | 
| 982 |     int b; // ...or this one | 
| 983 | }; | 
| 984 |  | 
| 985 | Q_DECLARE_TYPEINFO(QRegExpAnchorAlternation, Q_PRIMITIVE_TYPE); | 
| 986 | #endif | 
| 987 |  | 
| 988 | #ifndef QT_NO_REGEXP_CCLASS | 
| 989 |  | 
| 990 | #define FLAG(x) (1 << (x)) | 
| 991 | /* | 
| 992 |   The class QRegExpCharClass represents a set of characters, such as can | 
| 993 |   be found in regular expressions (e.g., [a-z] denotes the set | 
| 994 |   {a, b, ..., z}). | 
| 995 | */ | 
| 996 | class QRegExpCharClass | 
| 997 | { | 
| 998 | public: | 
| 999 |     QRegExpCharClass(); | 
| 1000 |  | 
| 1001 |     void clear(); | 
| 1002 |     bool negative() const { return n; } | 
| 1003 |     void setNegative(bool negative); | 
| 1004 |     void addCategories(uint cats); | 
| 1005 |     void addRange(ushort from, ushort to); | 
| 1006 |     void addSingleton(ushort ch) { addRange(from: ch, to: ch); } | 
| 1007 |  | 
| 1008 |     bool in(QChar ch) const; | 
| 1009 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1010 |     const QList<int> &firstOccurrence() const { return occ1; } | 
| 1011 | #endif | 
| 1012 |  | 
| 1013 | #if defined(QT_DEBUG) | 
| 1014 |     void dump() const; | 
| 1015 | #endif | 
| 1016 |  | 
| 1017 | private: | 
| 1018 |     QList<QRegExpCharClassRange> r; // character ranges | 
| 1019 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1020 |     QList<int> occ1; // first-occurrence array | 
| 1021 | #endif | 
| 1022 |     uint c; // character classes | 
| 1023 |     bool n; // negative? | 
| 1024 | }; | 
| 1025 | #else | 
| 1026 | struct QRegExpCharClass | 
| 1027 | { | 
| 1028 |     int dummy; | 
| 1029 |  | 
| 1030 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1031 |     QRegExpCharClass() { occ1.fill(0, NumBadChars); } | 
| 1032 |  | 
| 1033 |     const QList<int> &firstOccurrence() const { return occ1; } | 
| 1034 |     QList<int> occ1; | 
| 1035 | #endif | 
| 1036 | }; | 
| 1037 | #endif | 
| 1038 |  | 
| 1039 | Q_DECLARE_TYPEINFO(QRegExpCharClass, Q_RELOCATABLE_TYPE); | 
| 1040 |  | 
| 1041 | /* | 
| 1042 |   The QRegExpEngine class encapsulates a modified nondeterministic | 
| 1043 |   finite automaton (NFA). | 
| 1044 | */ | 
| 1045 | class QRegExpEngine | 
| 1046 | { | 
| 1047 |     Q_DISABLE_COPY_MOVE(QRegExpEngine) | 
| 1048 | public: | 
| 1049 |     QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers) | 
| 1050 |         : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); } | 
| 1051 |  | 
| 1052 |     QRegExpEngine(const QRegExpEngineKey &key); | 
| 1053 |     ~QRegExpEngine(); | 
| 1054 |  | 
| 1055 |     bool isValid() const { return valid; } | 
| 1056 |     const QString &errorString() const { return yyError; } | 
| 1057 |     int captureCount() const { return officialncap; } | 
| 1058 |  | 
| 1059 |     int createState(QChar ch); | 
| 1060 |     int createState(const QRegExpCharClass &cc); | 
| 1061 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1062 |     int createState(int bref); | 
| 1063 | #endif | 
| 1064 |  | 
| 1065 |     void addCatTransitions(const QList<int> &from, const QList<int> &to); | 
| 1066 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1067 |     void addPlusTransitions(const QList<int> &from, const QList<int> &to, int atom); | 
| 1068 | #endif | 
| 1069 |  | 
| 1070 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 1071 |     int anchorAlternation(int a, int b); | 
| 1072 |     int anchorConcatenation(int a, int b); | 
| 1073 | #else | 
| 1074 |     int anchorAlternation(int a, int b) { return a & b; } | 
| 1075 |     int anchorConcatenation(int a, int b) { return a | b; } | 
| 1076 | #endif | 
| 1077 |     void addAnchors(int from, int to, int a); | 
| 1078 |  | 
| 1079 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1080 |     void heuristicallyChooseHeuristic(); | 
| 1081 | #endif | 
| 1082 |  | 
| 1083 | #if defined(QT_DEBUG) | 
| 1084 |     void dump() const; | 
| 1085 | #endif | 
| 1086 |  | 
| 1087 |     QAtomicInt ref; | 
| 1088 |  | 
| 1089 | private: | 
| 1090 |     enum { CharClassBit = 0x10000, BackRefBit = 0x20000 }; | 
| 1091 |     enum { InitialState = 0, FinalState = 1 }; | 
| 1092 |  | 
| 1093 |     void setup(); | 
| 1094 |     int setupState(int match); | 
| 1095 |  | 
| 1096 |     /* | 
| 1097 |       Let's hope that 13 lookaheads and 14 back-references are | 
| 1098 |       enough. | 
| 1099 |      */ | 
| 1100 |     enum { MaxLookaheads = 13, MaxBackRefs = 14 }; | 
| 1101 |     enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, | 
| 1102 |            Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010, | 
| 1103 |            Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, | 
| 1104 |            Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, | 
| 1105 |            Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs, | 
| 1106 |  | 
| 1107 |            Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^ | 
| 1108 |                    ((Anchor_FirstLookahead << MaxLookaheads) - 1) }; | 
| 1109 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1110 |     int startAtom(bool officialCapture); | 
| 1111 |     void finishAtom(int atom, bool needCapture); | 
| 1112 | #endif | 
| 1113 |  | 
| 1114 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1115 |     int addLookahead(QRegExpEngine *eng, bool negative); | 
| 1116 | #endif | 
| 1117 |  | 
| 1118 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1119 |     bool goodStringMatch(QRegExpMatchState &matchState) const; | 
| 1120 |     bool badCharMatch(QRegExpMatchState &matchState) const; | 
| 1121 | #else | 
| 1122 |     bool bruteMatch(QRegExpMatchState &matchState) const; | 
| 1123 | #endif | 
| 1124 |  | 
| 1125 |     QList<QRegExpAutomatonState> s; // array of states | 
| 1126 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1127 |     QList<QRegExpAtom> f; // atom hierarchy | 
| 1128 |     int nf; // number of atoms | 
| 1129 |     int cf; // current atom | 
| 1130 |     QList<int> captureForOfficialCapture; | 
| 1131 | #endif | 
| 1132 |     int officialncap; // number of captures, seen from the outside | 
| 1133 |     int ncap; // number of captures, seen from the inside | 
| 1134 | #ifndef QT_NO_REGEXP_CCLASS | 
| 1135 |     QList<QRegExpCharClass> cl; // array of character classes | 
| 1136 | #endif | 
| 1137 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1138 |     QList<QRegExpLookahead *> ahead; // array of lookaheads | 
| 1139 | #endif | 
| 1140 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 1141 |     QList<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors | 
| 1142 | #endif | 
| 1143 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1144 |     bool caretAnchored; // does the regexp start with ^? | 
| 1145 |     bool trivial; // is the good-string all that needs to match? | 
| 1146 | #endif | 
| 1147 |     bool valid; // is the regular expression valid? | 
| 1148 |     Qt::CaseSensitivity cs; // case sensitive? | 
| 1149 |     bool greedyQuantifiers; // RegExp2? | 
| 1150 |     bool xmlSchemaExtensions; | 
| 1151 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1152 |     int nbrefs; // number of back-references | 
| 1153 | #endif | 
| 1154 |  | 
| 1155 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1156 |     bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch | 
| 1157 |  | 
| 1158 |     int goodEarlyStart; // the index where goodStr can first occur in a match | 
| 1159 |     int goodLateStart; // the index where goodStr can last occur in a match | 
| 1160 |     QString goodStr; // the string that any match has to contain | 
| 1161 |  | 
| 1162 |     int minl; // the minimum length of a match | 
| 1163 |     QList<int> occ1; // first-occurrence array | 
| 1164 | #endif | 
| 1165 |  | 
| 1166 |     /* | 
| 1167 |       The class Box is an abstraction for a regular expression | 
| 1168 |       fragment. It can also be seen as one node in the syntax tree of | 
| 1169 |       a regular expression with synthetized attributes. | 
| 1170 |  | 
| 1171 |       Its interface is ugly for performance reasons. | 
| 1172 |     */ | 
| 1173 |     class Box | 
| 1174 |     { | 
| 1175 |     public: | 
| 1176 |         Box(QRegExpEngine *engine); | 
| 1177 |         Box(const Box &b) { operator=(b); } | 
| 1178 |  | 
| 1179 |         Box &operator=(const Box &b); | 
| 1180 |  | 
| 1181 |         void clear() { operator=(b: Box(eng)); } | 
| 1182 |         void set(QChar ch); | 
| 1183 |         void set(const QRegExpCharClass &cc); | 
| 1184 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1185 |         void set(int bref); | 
| 1186 | #endif | 
| 1187 |  | 
| 1188 |         void cat(const Box &b); | 
| 1189 |         void orx(const Box &b); | 
| 1190 |         void plus(int atom); | 
| 1191 |         void opt(); | 
| 1192 |         void catAnchor(int a); | 
| 1193 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1194 |         void setupHeuristics(); | 
| 1195 | #endif | 
| 1196 |  | 
| 1197 | #if defined(QT_DEBUG) | 
| 1198 |         void dump() const; | 
| 1199 | #endif | 
| 1200 |  | 
| 1201 |     private: | 
| 1202 |         void addAnchorsToEngine(const Box &to) const; | 
| 1203 |  | 
| 1204 |         QRegExpEngine *eng; // the automaton under construction | 
| 1205 |         QList<int> ls; // the left states (firstpos) | 
| 1206 |         QList<int> rs; // the right states (lastpos) | 
| 1207 |         QMap<int, int> lanchors; // the left anchors | 
| 1208 |         QMap<int, int> ranchors; // the right anchors | 
| 1209 |         int skipanchors; // the anchors to match if the box is skipped | 
| 1210 |  | 
| 1211 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1212 |         int earlyStart; // the index where str can first occur | 
| 1213 |         int lateStart; // the index where str can last occur | 
| 1214 |         QString str; // a string that has to occur in any match | 
| 1215 |         QString leftStr; // a string occurring at the left of this box | 
| 1216 |         QString rightStr; // a string occurring at the right of this box | 
| 1217 |         int maxl; // the maximum length of this box (possibly InftyLen) | 
| 1218 | #endif | 
| 1219 |  | 
| 1220 |         int minl; // the minimum length of this box | 
| 1221 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1222 |         QList<int> occ1; // first-occurrence array | 
| 1223 | #endif | 
| 1224 |     }; | 
| 1225 |  | 
| 1226 |     friend class Box; | 
| 1227 |  | 
| 1228 |     /* | 
| 1229 |       This is the lexical analyzer for regular expressions. | 
| 1230 |     */ | 
| 1231 |     enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead, | 
| 1232 |            Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar, | 
| 1233 |            Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 }; | 
| 1234 |     int getChar(); | 
| 1235 |     int getEscape(); | 
| 1236 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 1237 |     int getRep(int def); | 
| 1238 | #endif | 
| 1239 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1240 |     void skipChars(int n); | 
| 1241 | #endif | 
| 1242 |     void error(const char *msg); | 
| 1243 |     void startTokenizer(const QChar *rx, int len); | 
| 1244 |     int getToken(); | 
| 1245 |  | 
| 1246 |     const QChar *yyIn; // a pointer to the input regular expression pattern | 
| 1247 |     int yyPos0; // the position of yyTok in the input pattern | 
| 1248 |     int yyPos; // the position of the next character to read | 
| 1249 |     int yyLen; // the length of yyIn | 
| 1250 |     int yyCh; // the last character read | 
| 1251 |     std::optional<QRegExpCharClass> yyCharClass; // attribute for Tok_CharClass tokens | 
| 1252 |     int yyMinRep; // attribute for Tok_Quantifier | 
| 1253 |     int yyMaxRep; // ditto | 
| 1254 |     QString yyError; // syntax error or overflow during parsing? | 
| 1255 |  | 
| 1256 |     /* | 
| 1257 |       This is the syntactic analyzer for regular expressions. | 
| 1258 |     */ | 
| 1259 |     int parse(const QChar *rx, int len); | 
| 1260 |     void parseAtom(Box *box); | 
| 1261 |     void parseFactor(Box *box); | 
| 1262 |     void parseTerm(Box *box); | 
| 1263 |     void parseExpression(Box *box); | 
| 1264 |  | 
| 1265 |     int yyTok; // the last token read | 
| 1266 |     bool yyMayCapture; // set this to false to disable capturing | 
| 1267 |  | 
| 1268 |     friend struct QRegExpMatchState; | 
| 1269 | }; | 
| 1270 |  | 
| 1271 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1272 | /* | 
| 1273 |   The struct QRegExpLookahead represents a lookahead a la Perl (e.g., | 
| 1274 |   (?=foo) and (?!bar)). | 
| 1275 | */ | 
| 1276 | struct QRegExpLookahead | 
| 1277 | { | 
| 1278 |     QRegExpEngine *eng; // NFA representing the embedded regular expression | 
| 1279 |     bool neg; // negative lookahead? | 
| 1280 |  | 
| 1281 |     inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0) | 
| 1282 |         : eng(eng0), neg(neg0) { } | 
| 1283 |     inline ~QRegExpLookahead() { delete eng; } | 
| 1284 | }; | 
| 1285 | #endif | 
| 1286 |  | 
| 1287 | /*! | 
| 1288 |     \internal | 
| 1289 |     convert the pattern string to the RegExp syntax. | 
| 1290 |  | 
| 1291 |     This is also used by QScriptEngine::newRegExp to convert to a pattern that JavaScriptCore can understan | 
| 1292 |  */ | 
| 1293 | Q_CORE5COMPAT_EXPORT QString qt_regexp_toCanonical(const QString &pattern, | 
| 1294 |                                                    QRegExp::PatternSyntax patternSyntax) | 
| 1295 | { | 
| 1296 |     switch (patternSyntax) { | 
| 1297 | #ifndef QT_NO_REGEXP_WILDCARD | 
| 1298 |     case QRegExp::Wildcard: | 
| 1299 |         return wc2rx(wc_str: pattern, enableEscaping: false); | 
| 1300 |     case QRegExp::WildcardUnix: | 
| 1301 |         return wc2rx(wc_str: pattern, enableEscaping: true); | 
| 1302 | #endif | 
| 1303 |     case QRegExp::FixedString: | 
| 1304 |         return QRegExp::escape(str: pattern); | 
| 1305 |     case QRegExp::W3CXmlSchema11: | 
| 1306 |     default: | 
| 1307 |         return pattern; | 
| 1308 |     } | 
| 1309 | } | 
| 1310 |  | 
| 1311 | QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) | 
| 1312 |     : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2), | 
| 1313 |       xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11) | 
| 1314 | { | 
| 1315 |     setup(); | 
| 1316 |  | 
| 1317 |     QString rx = qt_regexp_toCanonical(pattern: key.pattern, patternSyntax: key.patternSyntax); | 
| 1318 |  | 
| 1319 |     valid = (parse(rx: rx.unicode(), len: rx.size()) == rx.size()); | 
| 1320 |     if (!valid) { | 
| 1321 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1322 |         trivial = false; | 
| 1323 | #endif | 
| 1324 |         error(RXERR_LEFTDELIM); | 
| 1325 |     } | 
| 1326 | } | 
| 1327 |  | 
| 1328 | QRegExpEngine::~QRegExpEngine() | 
| 1329 | { | 
| 1330 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1331 |     qDeleteAll(c: ahead); | 
| 1332 | #endif | 
| 1333 | } | 
| 1334 |  | 
| 1335 | void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng) | 
| 1336 | { | 
| 1337 |     /* | 
| 1338 |       We use one QList<int> for all the big data used a lot in | 
| 1339 |       matchHere() and friends. | 
| 1340 |     */ | 
| 1341 |     int ns = eng->s.size(); // number of states | 
| 1342 |     int ncap = eng->ncap; | 
| 1343 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1344 |     int newSlideTabSize = qMax(a: eng->minl + 1, b: 16); | 
| 1345 | #else | 
| 1346 |     int newSlideTabSize = 0; | 
| 1347 | #endif | 
| 1348 |     int numCaptures = eng->captureCount(); | 
| 1349 |     int newCapturedSize = 2 + 2 * numCaptures; | 
| 1350 |     bigArray = q_check_ptr(p: (int *)realloc(ptr: bigArray, size: ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int))); | 
| 1351 |  | 
| 1352 |     // set all internal variables only _after_ bigArray is realloc'ed | 
| 1353 |     // to prevent a broken regexp in oom case | 
| 1354 |  | 
| 1355 |     slideTabSize = newSlideTabSize; | 
| 1356 |     capturedSize = newCapturedSize; | 
| 1357 |     inNextStack = bigArray; | 
| 1358 |     memset(s: inNextStack, c: -1, n: ns * sizeof(int)); | 
| 1359 |     curStack = inNextStack + ns; | 
| 1360 |     nextStack = inNextStack + 2 * ns; | 
| 1361 |  | 
| 1362 |     curCapBegin = inNextStack + 3 * ns; | 
| 1363 |     nextCapBegin = curCapBegin + ncap * ns; | 
| 1364 |     curCapEnd = curCapBegin + 2 * ncap * ns; | 
| 1365 |     nextCapEnd = curCapBegin + 3 * ncap * ns; | 
| 1366 |  | 
| 1367 |     tempCapBegin = curCapBegin + 4 * ncap * ns; | 
| 1368 |     tempCapEnd = tempCapBegin + ncap; | 
| 1369 |     capBegin = tempCapBegin + 2 * ncap; | 
| 1370 |     capEnd = tempCapBegin + 3 * ncap; | 
| 1371 |  | 
| 1372 |     slideTab = tempCapBegin + 4 * ncap; | 
| 1373 |     captured = slideTab + slideTabSize; | 
| 1374 |     memset(s: captured, c: -1, n: capturedSize*sizeof(int)); | 
| 1375 |     this->eng = eng; | 
| 1376 | } | 
| 1377 |  | 
| 1378 | /* | 
| 1379 |   Tries to match in str and returns an array of (begin, length) pairs | 
| 1380 |   for captured text. If there is no match, all pairs are (-1, -1). | 
| 1381 | */ | 
| 1382 | void QRegExpMatchState::match(const QChar *str0, int len0, int pos0, | 
| 1383 |     bool minimal0, bool oneTest, int caretIndex) | 
| 1384 | { | 
| 1385 |     bool matched = false; | 
| 1386 |     QChar char_null; | 
| 1387 |  | 
| 1388 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1389 |     if (eng->trivial && !oneTest) { | 
| 1390 |         // ### Qt6: qsizetype | 
| 1391 |         pos = int(QtPrivate::findString(haystack: QStringView(str0, len0), from: pos0, needle: QStringView(eng->goodStr.unicode(), eng->goodStr.size()), cs: eng->cs)); | 
| 1392 |         matchLen = eng->goodStr.size(); | 
| 1393 |         matched = (pos != -1); | 
| 1394 |     } else | 
| 1395 | #endif | 
| 1396 |     { | 
| 1397 |         in = str0; | 
| 1398 |         if (in == nullptr) | 
| 1399 |             in = &char_null; | 
| 1400 |         pos = pos0; | 
| 1401 |         caretPos = caretIndex; | 
| 1402 |         len = len0; | 
| 1403 |         minimal = minimal0; | 
| 1404 |         matchLen = 0; | 
| 1405 |         oneTestMatchedLen = 0; | 
| 1406 |  | 
| 1407 |         if (eng->valid && pos >= 0 && pos <= len) { | 
| 1408 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1409 |             if (oneTest) { | 
| 1410 |                 matched = matchHere(); | 
| 1411 |             } else { | 
| 1412 |                 if (pos <= len - eng->minl) { | 
| 1413 |                     if (eng->caretAnchored) { | 
| 1414 |                         matched = matchHere(); | 
| 1415 |                     } else if (eng->useGoodStringHeuristic) { | 
| 1416 |                         matched = eng->goodStringMatch(matchState&: *this); | 
| 1417 |                     } else { | 
| 1418 |                         matched = eng->badCharMatch(matchState&: *this); | 
| 1419 |                     } | 
| 1420 |                 } | 
| 1421 |             } | 
| 1422 | #else | 
| 1423 |             matched = oneTest ? matchHere() : eng->bruteMatch(*this); | 
| 1424 | #endif | 
| 1425 |         } | 
| 1426 |     } | 
| 1427 |  | 
| 1428 |     if (matched) { | 
| 1429 |         int *c = captured; | 
| 1430 |         *c++ = pos; | 
| 1431 |         *c++ = matchLen; | 
| 1432 |  | 
| 1433 |         int numCaptures = (capturedSize - 2) >> 1; | 
| 1434 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1435 |         for (int i = 0; i < numCaptures; ++i) { | 
| 1436 |             int j = eng->captureForOfficialCapture.at(i); | 
| 1437 |             if (capBegin[j] != EmptyCapture) { | 
| 1438 |                 int len = capEnd[j] - capBegin[j]; | 
| 1439 |                 *c++ = (len > 0) ? pos + capBegin[j] : 0; | 
| 1440 |                 *c++ = len; | 
| 1441 |             } else { | 
| 1442 |                 *c++ = -1; | 
| 1443 |                 *c++ = -1; | 
| 1444 |             } | 
| 1445 |         } | 
| 1446 | #endif | 
| 1447 |     } else { | 
| 1448 |         // we rely on 2's complement here | 
| 1449 |         memset(s: captured, c: -1, n: capturedSize * sizeof(int)); | 
| 1450 |     } | 
| 1451 | } | 
| 1452 |  | 
| 1453 | /* | 
| 1454 |   The three following functions add one state to the automaton and | 
| 1455 |   return the number of the state. | 
| 1456 | */ | 
| 1457 |  | 
| 1458 | int QRegExpEngine::createState(QChar ch) | 
| 1459 | { | 
| 1460 |     return setupState(ch.unicode()); | 
| 1461 | } | 
| 1462 |  | 
| 1463 | int QRegExpEngine::createState(const QRegExpCharClass &cc) | 
| 1464 | { | 
| 1465 | #ifndef QT_NO_REGEXP_CCLASS | 
| 1466 |     int n = cl.size(); | 
| 1467 |     cl += QRegExpCharClass(cc); | 
| 1468 |     return setupState(CharClassBit | n); | 
| 1469 | #else | 
| 1470 |     Q_UNUSED(cc); | 
| 1471 |     return setupState(CharClassBit); | 
| 1472 | #endif | 
| 1473 | } | 
| 1474 |  | 
| 1475 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1476 | int QRegExpEngine::createState(int bref) | 
| 1477 | { | 
| 1478 |     if (bref > nbrefs) { | 
| 1479 |         nbrefs = bref; | 
| 1480 |         if (nbrefs > MaxBackRefs) { | 
| 1481 |             error(RXERR_LIMIT); | 
| 1482 |             return 0; | 
| 1483 |         } | 
| 1484 |     } | 
| 1485 |     return setupState(BackRefBit | bref); | 
| 1486 | } | 
| 1487 | #endif | 
| 1488 |  | 
| 1489 | /* | 
| 1490 |   The two following functions add a transition between all pairs of | 
| 1491 |   states (i, j) where i is found in from, and j is found in to. | 
| 1492 |  | 
| 1493 |   Cat-transitions are distinguished from plus-transitions for | 
| 1494 |   capturing. | 
| 1495 | */ | 
| 1496 |  | 
| 1497 | void QRegExpEngine::addCatTransitions(const QList<int> &from, const QList<int> &to) | 
| 1498 | { | 
| 1499 |     for (int i = 0; i < from.size(); i++) | 
| 1500 |         mergeInto(a: &s[from.at(i)].outs, b: to); | 
| 1501 | } | 
| 1502 |  | 
| 1503 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1504 | void QRegExpEngine::addPlusTransitions(const QList<int> &from, const QList<int> &to, int atom) | 
| 1505 | { | 
| 1506 |     for (int i = 0; i < from.size(); i++) { | 
| 1507 |         QRegExpAutomatonState &st = s[from.at(i)]; | 
| 1508 |         const QList<int> oldOuts = st.outs; | 
| 1509 |         mergeInto(a: &st.outs, b: to); | 
| 1510 |         if (f.at(i: atom).capture != QRegExpAtom::NoCapture) { | 
| 1511 |             for (int j = 0; j < to.size(); j++) { | 
| 1512 |                 // ### st.reenter.contains(to.at(j)) check looks suspicious | 
| 1513 |                 if (!st.reenter.contains(key: to.at(i: j)) && | 
| 1514 |                      !std::binary_search(first: oldOuts.constBegin(), last: oldOuts.constEnd(), val: to.at(i: j))) | 
| 1515 |                     st.reenter.insert(key: to.at(i: j), value: atom); | 
| 1516 |             } | 
| 1517 |         } | 
| 1518 |     } | 
| 1519 | } | 
| 1520 | #endif | 
| 1521 |  | 
| 1522 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 1523 | /* | 
| 1524 |   Returns an anchor that means a OR b. | 
| 1525 | */ | 
| 1526 | int QRegExpEngine::anchorAlternation(int a, int b) | 
| 1527 | { | 
| 1528 |     if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0) | 
| 1529 |         return a & b; | 
| 1530 |  | 
| 1531 |     int n = aa.size(); | 
| 1532 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1533 |     if (n > 0 && aa.at(i: n - 1).a == a && aa.at(i: n - 1).b == b) | 
| 1534 |         return Anchor_Alternation | (n - 1); | 
| 1535 | #endif | 
| 1536 |  | 
| 1537 |     QRegExpAnchorAlternation element = {.a: a, .b: b}; | 
| 1538 |     aa.append(t: element); | 
| 1539 |     return Anchor_Alternation | n; | 
| 1540 | } | 
| 1541 |  | 
| 1542 | /* | 
| 1543 |   Returns an anchor that means a AND b. | 
| 1544 | */ | 
| 1545 | int QRegExpEngine::anchorConcatenation(int a, int b) | 
| 1546 | { | 
| 1547 |     if (((a | b) & Anchor_Alternation) == 0) | 
| 1548 |         return a | b; | 
| 1549 |     if ((b & Anchor_Alternation) != 0) | 
| 1550 |         qSwap(value1&: a, value2&: b); | 
| 1551 |  | 
| 1552 |     int aprime = anchorConcatenation(a: aa.at(i: a ^ Anchor_Alternation).a, b); | 
| 1553 |     int bprime = anchorConcatenation(a: aa.at(i: a ^ Anchor_Alternation).b, b); | 
| 1554 |     return anchorAlternation(a: aprime, b: bprime); | 
| 1555 | } | 
| 1556 | #endif | 
| 1557 |  | 
| 1558 | /* | 
| 1559 |   Adds anchor a on a transition caracterised by its from state and | 
| 1560 |   its to state. | 
| 1561 | */ | 
| 1562 | void QRegExpEngine::addAnchors(int from, int to, int a) | 
| 1563 | { | 
| 1564 |     QRegExpAutomatonState &st = s[from]; | 
| 1565 |     if (st.anchors.contains(key: to)) | 
| 1566 |         a = anchorAlternation(a: st.anchors.value(key: to), b: a); | 
| 1567 |     st.anchors.insert(key: to, value: a); | 
| 1568 | } | 
| 1569 |  | 
| 1570 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1571 | /* | 
| 1572 |   This function chooses between the good-string and the bad-character | 
| 1573 |   heuristics. It computes two scores and chooses the heuristic with | 
| 1574 |   the highest score. | 
| 1575 |  | 
| 1576 |   Here are some common-sense constraints on the scores that should be | 
| 1577 |   respected if the formulas are ever modified: (1) If goodStr is | 
| 1578 |   empty, the good-string heuristic scores 0. (2) If the regular | 
| 1579 |   expression is trivial, the good-string heuristic should be used. | 
| 1580 |   (3) If the search is case insensitive, the good-string heuristic | 
| 1581 |   should be used, unless it scores 0. (Case insensitivity turns all | 
| 1582 |   entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is | 
| 1583 |   big, the good-string heuristic should score less. | 
| 1584 | */ | 
| 1585 | void QRegExpEngine::heuristicallyChooseHeuristic() | 
| 1586 | { | 
| 1587 |     if (minl == 0) { | 
| 1588 |         useGoodStringHeuristic = false; | 
| 1589 |     } else if (trivial) { | 
| 1590 |         useGoodStringHeuristic = true; | 
| 1591 |     } else { | 
| 1592 |         /* | 
| 1593 |           Magic formula: The good string has to constitute a good | 
| 1594 |           proportion of the minimum-length string, and appear at a | 
| 1595 |           more-or-less known index. | 
| 1596 |         */ | 
| 1597 |         int goodStringScore = (64 * goodStr.size() / minl) - | 
| 1598 |                               (goodLateStart - goodEarlyStart); | 
| 1599 |         /* | 
| 1600 |           Less magic formula: We pick some characters at random, and | 
| 1601 |           check whether they are good or bad. | 
| 1602 |         */ | 
| 1603 |         int badCharScore = 0; | 
| 1604 |         int step = qMax(a: 1, b: NumBadChars / 32); | 
| 1605 |         for (int i = 1; i < NumBadChars; i += step) { | 
| 1606 |             if (occ1.at(i) == NoOccurrence) | 
| 1607 |                 badCharScore += minl; | 
| 1608 |             else | 
| 1609 |                 badCharScore += occ1.at(i); | 
| 1610 |         } | 
| 1611 |         badCharScore /= minl; | 
| 1612 |         useGoodStringHeuristic = (goodStringScore > badCharScore); | 
| 1613 |     } | 
| 1614 | } | 
| 1615 | #endif | 
| 1616 |  | 
| 1617 | #if defined(QT_DEBUG) | 
| 1618 | void QRegExpEngine::dump() const | 
| 1619 | { | 
| 1620 |     int i, j; | 
| 1621 |     qDebug(msg: "Case %ssensitive engine" , cs ? ""  : "in" ); | 
| 1622 |     qDebug(msg: "  States" ); | 
| 1623 |     for (i = 0; i < s.size(); i++) { | 
| 1624 |         qDebug(msg: "  %d%s" , i, i == InitialState ? " (initial)"  : i == FinalState ? " (final)"  : "" ); | 
| 1625 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1626 |         if (nf > 0) | 
| 1627 |             qDebug(msg: "    in atom %d" , s[i].atom); | 
| 1628 | #endif | 
| 1629 |         int m = s[i].match; | 
| 1630 |         if ((m & CharClassBit) != 0) { | 
| 1631 |             qDebug(msg: "    match character class %d" , m ^ CharClassBit); | 
| 1632 | #ifndef QT_NO_REGEXP_CCLASS | 
| 1633 |             cl[m ^ CharClassBit].dump(); | 
| 1634 | #else | 
| 1635 |             qDebug("    negative character class" ); | 
| 1636 | #endif | 
| 1637 |         } else if ((m & BackRefBit) != 0) { | 
| 1638 |             qDebug(msg: "    match back-reference %d" , m ^ BackRefBit); | 
| 1639 |         } else if (m >= 0x20 && m <= 0x7e) { | 
| 1640 |             qDebug(msg: "    match 0x%.4x (%c)" , m, m); | 
| 1641 |         } else { | 
| 1642 |             qDebug(msg: "    match 0x%.4x" , m); | 
| 1643 |         } | 
| 1644 |         for (j = 0; j < s[i].outs.size(); j++) { | 
| 1645 |             int next = s[i].outs[j]; | 
| 1646 |             qDebug(msg: "    -> %d" , next); | 
| 1647 |             if (s[i].reenter.contains(key: next)) | 
| 1648 |                 qDebug(msg: "       [reenter %d]" , s[i].reenter[next]); | 
| 1649 |             if (s[i].anchors.value(key: next) != 0) | 
| 1650 |                 qDebug(msg: "       [anchors 0x%.8x]" , s[i].anchors[next]); | 
| 1651 |         } | 
| 1652 |     } | 
| 1653 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1654 |     if (nf > 0) { | 
| 1655 |         qDebug(msg: "  Atom    Parent  Capture" ); | 
| 1656 |         for (i = 0; i < nf; i++) { | 
| 1657 |             if (f[i].capture == QRegExpAtom::NoCapture) { | 
| 1658 |                 qDebug(msg: "  %6d  %6d     nil" , i, f[i].parent); | 
| 1659 |             } else { | 
| 1660 |                 int cap = f[i].capture; | 
| 1661 |                 bool official = captureForOfficialCapture.contains(t: cap); | 
| 1662 |                 qDebug(msg: "  %6d  %6d  %6d  %s" , i, f[i].parent, f[i].capture, | 
| 1663 |                        official ? "official"  : "" ); | 
| 1664 |             } | 
| 1665 |         } | 
| 1666 |     } | 
| 1667 | #endif | 
| 1668 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 1669 |     for (i = 0; i < aa.size(); i++) | 
| 1670 |         qDebug(msg: "  Anchor alternation 0x%.8x: 0x%.8x 0x%.9x" , i, aa[i].a, aa[i].b); | 
| 1671 | #endif | 
| 1672 | } | 
| 1673 | #endif | 
| 1674 |  | 
| 1675 | void QRegExpEngine::setup() | 
| 1676 | { | 
| 1677 |     ref.storeRelaxed(newValue: 1); | 
| 1678 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1679 |     f.resize(size: 32); | 
| 1680 |     nf = 0; | 
| 1681 |     cf = -1; | 
| 1682 | #endif | 
| 1683 |     officialncap = 0; | 
| 1684 |     ncap = 0; | 
| 1685 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1686 |     caretAnchored = true; | 
| 1687 |     trivial = true; | 
| 1688 | #endif | 
| 1689 |     valid = false; | 
| 1690 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1691 |     nbrefs = 0; | 
| 1692 | #endif | 
| 1693 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1694 |     useGoodStringHeuristic = true; | 
| 1695 |     minl = 0; | 
| 1696 |     occ1.fill(t: 0, newSize: NumBadChars); | 
| 1697 | #endif | 
| 1698 | } | 
| 1699 |  | 
| 1700 | int QRegExpEngine::setupState(int match) | 
| 1701 | { | 
| 1702 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1703 |     s += QRegExpAutomatonState(cf, match); | 
| 1704 | #else | 
| 1705 |     s += QRegExpAutomatonState(match); | 
| 1706 | #endif | 
| 1707 |     return s.size() - 1; | 
| 1708 | } | 
| 1709 |  | 
| 1710 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1711 | /* | 
| 1712 |   Functions startAtom() and finishAtom() should be called to delimit | 
| 1713 |   atoms. When a state is created, it is assigned to the current atom. | 
| 1714 |   The information is later used for capturing. | 
| 1715 | */ | 
| 1716 | int QRegExpEngine::startAtom(bool officialCapture) | 
| 1717 | { | 
| 1718 |     if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) | 
| 1719 |         f.resize(size: (nf + 1) << 1); | 
| 1720 |     f[nf].parent = cf; | 
| 1721 |     cf = nf++; | 
| 1722 |     f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture; | 
| 1723 |     return cf; | 
| 1724 | } | 
| 1725 |  | 
| 1726 | void QRegExpEngine::finishAtom(int atom, bool needCapture) | 
| 1727 | { | 
| 1728 |     if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture) | 
| 1729 |         f[atom].capture = QRegExpAtom::UnofficialCapture; | 
| 1730 |     cf = f.at(i: atom).parent; | 
| 1731 | } | 
| 1732 | #endif | 
| 1733 |  | 
| 1734 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1735 | /* | 
| 1736 |   Creates a lookahead anchor. | 
| 1737 | */ | 
| 1738 | int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative) | 
| 1739 | { | 
| 1740 |     int n = ahead.size(); | 
| 1741 |     if (n == MaxLookaheads) { | 
| 1742 |         error(RXERR_LIMIT); | 
| 1743 |         return 0; | 
| 1744 |     } | 
| 1745 |     ahead += new QRegExpLookahead(eng, negative); | 
| 1746 |     return Anchor_FirstLookahead << n; | 
| 1747 | } | 
| 1748 | #endif | 
| 1749 |  | 
| 1750 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1751 | /* | 
| 1752 |   We want the longest leftmost captures. | 
| 1753 | */ | 
| 1754 | static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, | 
| 1755 |                             const int *end2) | 
| 1756 | { | 
| 1757 |     for (int i = 0; i < ncap; i++) { | 
| 1758 |         int delta = begin2[i] - begin1[i]; // it has to start early... | 
| 1759 |         if (delta == 0) | 
| 1760 |             delta = end1[i] - end2[i]; // ...and end late | 
| 1761 |  | 
| 1762 |         if (delta != 0) | 
| 1763 |             return delta > 0; | 
| 1764 |     } | 
| 1765 |     return false; | 
| 1766 | } | 
| 1767 | #endif | 
| 1768 |  | 
| 1769 | /* | 
| 1770 |   Returns \c true if anchor a matches at position pos + i in the input | 
| 1771 |   string, otherwise false. | 
| 1772 | */ | 
| 1773 | bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin) | 
| 1774 | { | 
| 1775 |     int j; | 
| 1776 |  | 
| 1777 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 1778 |     if ((a & QRegExpEngine::Anchor_Alternation) != 0) | 
| 1779 |         return testAnchor(i, a: eng->aa.at(i: a ^ QRegExpEngine::Anchor_Alternation).a, capBegin) | 
| 1780 |                || testAnchor(i, a: eng->aa.at(i: a ^ QRegExpEngine::Anchor_Alternation).b, capBegin); | 
| 1781 | #endif | 
| 1782 |  | 
| 1783 |     if ((a & QRegExpEngine::Anchor_Caret) != 0) { | 
| 1784 |         if (pos + i != caretPos) | 
| 1785 |             return false; | 
| 1786 |     } | 
| 1787 |     if ((a & QRegExpEngine::Anchor_Dollar) != 0) { | 
| 1788 |         if (pos + i != len) | 
| 1789 |             return false; | 
| 1790 |     } | 
| 1791 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 1792 |     if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) { | 
| 1793 |         bool before = false; | 
| 1794 |         bool after = false; | 
| 1795 |         if (pos + i != 0) | 
| 1796 |             before = isWord(ch: in[pos + i - 1]); | 
| 1797 |         if (pos + i != len) | 
| 1798 |             after = isWord(ch: in[pos + i]); | 
| 1799 |         if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after)) | 
| 1800 |             return false; | 
| 1801 |         if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after)) | 
| 1802 |             return false; | 
| 1803 |     } | 
| 1804 | #endif | 
| 1805 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 1806 |     if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) { | 
| 1807 |         const QList<QRegExpLookahead *> &ahead = eng->ahead; | 
| 1808 |         for (j = 0; j < ahead.size(); j++) { | 
| 1809 |             if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) { | 
| 1810 |                 QRegExpMatchState matchState; | 
| 1811 |                 matchState.prepareForMatch(eng: ahead[j]->eng); | 
| 1812 |                 matchState.match(str0: in + pos + i, len0: len - pos - i, pos0: 0, | 
| 1813 |                     minimal0: true, oneTest: true, caretIndex: caretPos - pos - i); | 
| 1814 |                 if ((matchState.captured[0] == 0) == ahead[j]->neg) | 
| 1815 |                     return false; | 
| 1816 |             } | 
| 1817 |         } | 
| 1818 |     } | 
| 1819 | #endif | 
| 1820 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1821 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1822 |     for (j = 0; j < eng->nbrefs; j++) { | 
| 1823 |         if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) { | 
| 1824 |             int i = eng->captureForOfficialCapture.at(i: j); | 
| 1825 |             if (capBegin[i] != EmptyCapture) | 
| 1826 |                 return false; | 
| 1827 |         } | 
| 1828 |     } | 
| 1829 | #endif | 
| 1830 | #endif | 
| 1831 |     return true; | 
| 1832 | } | 
| 1833 |  | 
| 1834 | #ifndef QT_NO_REGEXP_OPTIM | 
| 1835 | /* | 
| 1836 |   The three following functions are what Jeffrey Friedl would call | 
| 1837 |   transmissions (or bump-alongs). Using one or the other should make | 
| 1838 |   no difference except in performance. | 
| 1839 | */ | 
| 1840 |  | 
| 1841 | bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const | 
| 1842 | { | 
| 1843 |     int k = matchState.pos + goodEarlyStart; | 
| 1844 |     QStringMatcher matcher(goodStr.unicode(), goodStr.size(), cs); | 
| 1845 |     while ((k = matcher.indexIn(str: matchState.in, length: matchState.len, from: k)) != -1) { | 
| 1846 |         int from = k - goodLateStart; | 
| 1847 |         int to = k - goodEarlyStart; | 
| 1848 |         if (from > matchState.pos) | 
| 1849 |             matchState.pos = from; | 
| 1850 |  | 
| 1851 |         while (matchState.pos <= to) { | 
| 1852 |             if (matchState.matchHere()) | 
| 1853 |                 return true; | 
| 1854 |             ++matchState.pos; | 
| 1855 |         } | 
| 1856 |         ++k; | 
| 1857 |     } | 
| 1858 |     return false; | 
| 1859 | } | 
| 1860 |  | 
| 1861 | bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const | 
| 1862 | { | 
| 1863 |     int slideHead = 0; | 
| 1864 |     int slideNext = 0; | 
| 1865 |     int i; | 
| 1866 |     int lastPos = matchState.len - minl; | 
| 1867 |     memset(s: matchState.slideTab, c: 0, n: matchState.slideTabSize * sizeof(int)); | 
| 1868 |  | 
| 1869 |     /* | 
| 1870 |       Set up the slide table, used for the bad-character heuristic, | 
| 1871 |       using the table of first occurrence of each character. | 
| 1872 |     */ | 
| 1873 |     for (i = 0; i < minl; i++) { | 
| 1874 |         int sk = occ1[BadChar(matchState.in[matchState.pos + i])]; | 
| 1875 |         if (sk == NoOccurrence) | 
| 1876 |             sk = i + 1; | 
| 1877 |         if (sk > 0) { | 
| 1878 |             int k = i + 1 - sk; | 
| 1879 |             if (k < 0) { | 
| 1880 |                 sk = i + 1; | 
| 1881 |                 k = 0; | 
| 1882 |             } | 
| 1883 |             if (sk > matchState.slideTab[k]) | 
| 1884 |                 matchState.slideTab[k] = sk; | 
| 1885 |         } | 
| 1886 |     } | 
| 1887 |  | 
| 1888 |     if (matchState.pos > lastPos) | 
| 1889 |         return false; | 
| 1890 |  | 
| 1891 |     for (;;) { | 
| 1892 |         if (++slideNext >= matchState.slideTabSize) | 
| 1893 |             slideNext = 0; | 
| 1894 |         if (matchState.slideTab[slideHead] > 0) { | 
| 1895 |             if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext]) | 
| 1896 |                 matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1; | 
| 1897 |             matchState.slideTab[slideHead] = 0; | 
| 1898 |         } else { | 
| 1899 |             if (matchState.matchHere()) | 
| 1900 |                 return true; | 
| 1901 |         } | 
| 1902 |  | 
| 1903 |         if (matchState.pos == lastPos) | 
| 1904 |             break; | 
| 1905 |  | 
| 1906 |         /* | 
| 1907 |           Update the slide table. This code has much in common with | 
| 1908 |           the initialization code. | 
| 1909 |         */ | 
| 1910 |         int sk = occ1[BadChar(matchState.in[matchState.pos + minl])]; | 
| 1911 |         if (sk == NoOccurrence) { | 
| 1912 |             matchState.slideTab[slideNext] = minl; | 
| 1913 |         } else if (sk > 0) { | 
| 1914 |             int k = slideNext + minl - sk; | 
| 1915 |             if (k >= matchState.slideTabSize) | 
| 1916 |                 k -= matchState.slideTabSize; | 
| 1917 |             if (sk > matchState.slideTab[k]) | 
| 1918 |                 matchState.slideTab[k] = sk; | 
| 1919 |         } | 
| 1920 |         slideHead = slideNext; | 
| 1921 |         ++matchState.pos; | 
| 1922 |     } | 
| 1923 |     return false; | 
| 1924 | } | 
| 1925 | #else | 
| 1926 | bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const | 
| 1927 | { | 
| 1928 |     while (matchState.pos <= matchState.len) { | 
| 1929 |         if (matchState.matchHere()) | 
| 1930 |             return true; | 
| 1931 |         ++matchState.pos; | 
| 1932 |     } | 
| 1933 |     return false; | 
| 1934 | } | 
| 1935 | #endif | 
| 1936 |  | 
| 1937 | /* | 
| 1938 |   Here's the core of the engine. It tries to do a match here and now. | 
| 1939 | */ | 
| 1940 | bool QRegExpMatchState::matchHere() | 
| 1941 | { | 
| 1942 |     int ncur = 1, nnext = 0; | 
| 1943 |     int i = 0, j, k, m; | 
| 1944 |     bool stop = false; | 
| 1945 |  | 
| 1946 |     matchLen = -1; | 
| 1947 |     oneTestMatchedLen = -1; | 
| 1948 |     curStack[0] = QRegExpEngine::InitialState; | 
| 1949 |  | 
| 1950 |     int ncap = eng->ncap; | 
| 1951 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 1952 |     if (ncap > 0) { | 
| 1953 |         for (j = 0; j < ncap; j++) { | 
| 1954 |             curCapBegin[j] = EmptyCapture; | 
| 1955 |             curCapEnd[j] = EmptyCapture; | 
| 1956 |         } | 
| 1957 |     } | 
| 1958 | #endif | 
| 1959 |  | 
| 1960 | #ifndef QT_NO_REGEXP_BACKREF | 
| 1961 |     while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop) | 
| 1962 | #else | 
| 1963 |     while (ncur > 0 && i <= len - pos && !stop) | 
| 1964 | #endif | 
| 1965 |     { | 
| 1966 |         int ch = (i < len - pos) ? in[pos + i].unicode() : 0; | 
| 1967 |         for (j = 0; j < ncur; j++) { | 
| 1968 |             int cur = curStack[j]; | 
| 1969 |             const QRegExpAutomatonState &scur = eng->s.at(i: cur); | 
| 1970 |             const QList<int> &outs = scur.outs; | 
| 1971 |             for (k = 0; k < outs.size(); k++) { | 
| 1972 |                 int next = outs.at(i: k); | 
| 1973 |                 const QRegExpAutomatonState &snext = eng->s.at(i: next); | 
| 1974 |                 bool inside = true; | 
| 1975 | #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) | 
| 1976 |                 int needSomeSleep = 0; | 
| 1977 | #endif | 
| 1978 |  | 
| 1979 |                 /* | 
| 1980 |                   First, check if the anchors are anchored properly. | 
| 1981 |                 */ | 
| 1982 |                 int a = scur.anchors.value(key: next); | 
| 1983 |                 if (a != 0 && !testAnchor(i, a, capBegin: curCapBegin + j * ncap)) | 
| 1984 |                     inside = false; | 
| 1985 |  | 
| 1986 |                 /* | 
| 1987 |                   If indeed they are, check if the input character is | 
| 1988 |                   correct for this transition. | 
| 1989 |                 */ | 
| 1990 |                 if (inside) { | 
| 1991 |                     m = snext.match; | 
| 1992 |                     if ((m & (QRegExpEngine::CharClassBit | QRegExpEngine::BackRefBit)) == 0) { | 
| 1993 |                         if (eng->cs) | 
| 1994 |                             inside = (m == ch); | 
| 1995 |                         else | 
| 1996 |                             inside = (QChar(m).toLower() == QChar(ch).toLower()); | 
| 1997 |                     } else if (next == QRegExpEngine::FinalState) { | 
| 1998 |                         matchLen = i; | 
| 1999 |                         stop = minimal; | 
| 2000 |                         inside = true; | 
| 2001 |                     } else if ((m & QRegExpEngine::CharClassBit) != 0) { | 
| 2002 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2003 |                         const QRegExpCharClass &cc = eng->cl.at(i: m ^ QRegExpEngine::CharClassBit); | 
| 2004 |                         if (eng->cs) | 
| 2005 |                             inside = cc.in(ch: QChar(ch)); | 
| 2006 |                         else if (cc.negative()) | 
| 2007 |                             inside = cc.in(ch: QChar(ch).toLower()) && | 
| 2008 |                                      cc.in(ch: QChar(ch).toUpper()); | 
| 2009 |                         else | 
| 2010 |                             inside = cc.in(ch: QChar(ch).toLower()) || | 
| 2011 |                                      cc.in(ch: QChar(ch).toUpper()); | 
| 2012 | #endif | 
| 2013 | #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) | 
| 2014 |                     } else { /* ((m & QRegExpEngine::BackRefBit) != 0) */ | 
| 2015 |                         int bref = m ^ QRegExpEngine::BackRefBit; | 
| 2016 |                         int ell = j * ncap + eng->captureForOfficialCapture.at(i: bref - 1); | 
| 2017 |  | 
| 2018 |                         inside = bref <= ncap && curCapBegin[ell] != EmptyCapture; | 
| 2019 |                         if (inside) { | 
| 2020 |                             if (eng->cs) | 
| 2021 |                                 inside = (in[pos + curCapBegin[ell]] == QChar(ch)); | 
| 2022 |                             else | 
| 2023 |                                 inside = (in[pos + curCapBegin[ell]].toLower() | 
| 2024 |                                        == QChar(ch).toLower()); | 
| 2025 |                         } | 
| 2026 |  | 
| 2027 |                         if (inside) { | 
| 2028 |                             int delta; | 
| 2029 |                             if (curCapEnd[ell] == EmptyCapture) | 
| 2030 |                                 delta = i - curCapBegin[ell]; | 
| 2031 |                             else | 
| 2032 |                                 delta = curCapEnd[ell] - curCapBegin[ell]; | 
| 2033 |  | 
| 2034 |                             inside = (delta <= len - (pos + i)); | 
| 2035 |                             if (inside && delta > 1) { | 
| 2036 |                                 int n = 1; | 
| 2037 |                                 if (eng->cs) { | 
| 2038 |                                     while (n < delta) { | 
| 2039 |                                         if (in[pos + curCapBegin[ell] + n] | 
| 2040 |                                             != in[pos + i + n]) | 
| 2041 |                                             break; | 
| 2042 |                                         ++n; | 
| 2043 |                                     } | 
| 2044 |                                 } else { | 
| 2045 |                                     while (n < delta) { | 
| 2046 |                                         QChar a = in[pos + curCapBegin[ell] + n]; | 
| 2047 |                                         QChar b = in[pos + i + n]; | 
| 2048 |                                         if (a.toLower() != b.toLower()) | 
| 2049 |                                             break; | 
| 2050 |                                         ++n; | 
| 2051 |                                     } | 
| 2052 |                                 } | 
| 2053 |                                 inside = (n == delta); | 
| 2054 |                                 if (inside) | 
| 2055 |                                     needSomeSleep = delta - 1; | 
| 2056 |                             } | 
| 2057 |                         } | 
| 2058 | #endif | 
| 2059 |                     } | 
| 2060 |                 } | 
| 2061 |  | 
| 2062 |                 /* | 
| 2063 |                   We must now update our data structures. | 
| 2064 |                 */ | 
| 2065 |                 if (inside) { | 
| 2066 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2067 |                     int *capBegin, *capEnd; | 
| 2068 | #endif | 
| 2069 |                     /* | 
| 2070 |                       If the next state was not encountered yet, all | 
| 2071 |                       is fine. | 
| 2072 |                     */ | 
| 2073 |                     if ((m = inNextStack[next]) == -1) { | 
| 2074 |                         m = nnext++; | 
| 2075 |                         nextStack[m] = next; | 
| 2076 |                         inNextStack[next] = m; | 
| 2077 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2078 |                         capBegin = nextCapBegin + m * ncap; | 
| 2079 |                         capEnd = nextCapEnd + m * ncap; | 
| 2080 |  | 
| 2081 |                     /* | 
| 2082 |                       Otherwise, we'll first maintain captures in | 
| 2083 |                       temporary arrays, and decide at the end whether | 
| 2084 |                       it's best to keep the previous capture zones or | 
| 2085 |                       the new ones. | 
| 2086 |                     */ | 
| 2087 |                     } else { | 
| 2088 |                         capBegin = tempCapBegin; | 
| 2089 |                         capEnd = tempCapEnd; | 
| 2090 | #endif | 
| 2091 |                     } | 
| 2092 |  | 
| 2093 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2094 |                     /* | 
| 2095 |                       Updating the capture zones is much of a task. | 
| 2096 |                     */ | 
| 2097 |                     if (ncap > 0) { | 
| 2098 |                         memcpy(dest: capBegin, src: curCapBegin + j * ncap, n: ncap * sizeof(int)); | 
| 2099 |                         memcpy(dest: capEnd, src: curCapEnd + j * ncap, n: ncap * sizeof(int)); | 
| 2100 |                         int c = scur.atom, n = snext.atom; | 
| 2101 |                         int p = -1, q = -1; | 
| 2102 |                         int cap; | 
| 2103 |  | 
| 2104 |                         /* | 
| 2105 |                           Lemma 1. For any x in the range [0..nf), we | 
| 2106 |                           have f[x].parent < x. | 
| 2107 |  | 
| 2108 |                           Proof. By looking at startAtom(), it is | 
| 2109 |                           clear that cf < nf holds all the time, and | 
| 2110 |                           thus that f[nf].parent < nf. | 
| 2111 |                         */ | 
| 2112 |  | 
| 2113 |                         /* | 
| 2114 |                           If we are reentering an atom, we empty all | 
| 2115 |                           capture zones inside it. | 
| 2116 |                         */ | 
| 2117 |                         if ((q = scur.reenter.value(key: next)) != 0) { | 
| 2118 |                             QBitArray b(eng->nf, false); | 
| 2119 |                             b.setBit(i: q, val: true); | 
| 2120 |                             for (int ell = q + 1; ell < eng->nf; ell++) { | 
| 2121 |                                 if (b.testBit(i: eng->f.at(i: ell).parent)) { | 
| 2122 |                                     b.setBit(i: ell, val: true); | 
| 2123 |                                     cap = eng->f.at(i: ell).capture; | 
| 2124 |                                     if (cap >= 0) { | 
| 2125 |                                         capBegin[cap] = EmptyCapture; | 
| 2126 |                                         capEnd[cap] = EmptyCapture; | 
| 2127 |                                     } | 
| 2128 |                                 } | 
| 2129 |                             } | 
| 2130 |                             p = eng->f.at(i: q).parent; | 
| 2131 |  | 
| 2132 |                         /* | 
| 2133 |                           Otherwise, close the capture zones we are | 
| 2134 |                           leaving. We are leaving f[c].capture, | 
| 2135 |                           f[f[c].parent].capture, | 
| 2136 |                           f[f[f[c].parent].parent].capture, ..., | 
| 2137 |                           until f[x].capture, with x such that | 
| 2138 |                           f[x].parent is the youngest common ancestor | 
| 2139 |                           for c and n. | 
| 2140 |  | 
| 2141 |                           We go up along c's and n's ancestry until | 
| 2142 |                           we find x. | 
| 2143 |                         */ | 
| 2144 |                         } else { | 
| 2145 |                             p = c; | 
| 2146 |                             q = n; | 
| 2147 |                             while (p != q) { | 
| 2148 |                                 if (p > q) { | 
| 2149 |                                     cap = eng->f.at(i: p).capture; | 
| 2150 |                                     if (cap >= 0) { | 
| 2151 |                                         if (capBegin[cap] == i) { | 
| 2152 |                                             capBegin[cap] = EmptyCapture; | 
| 2153 |                                             capEnd[cap] = EmptyCapture; | 
| 2154 |                                         } else { | 
| 2155 |                                             capEnd[cap] = i; | 
| 2156 |                                         } | 
| 2157 |                                     } | 
| 2158 |                                     p = eng->f.at(i: p).parent; | 
| 2159 |                                 } else { | 
| 2160 |                                     q = eng->f.at(i: q).parent; | 
| 2161 |                                 } | 
| 2162 |                             } | 
| 2163 |                         } | 
| 2164 |  | 
| 2165 |                         /* | 
| 2166 |                           In any case, we now open the capture zones | 
| 2167 |                           we are entering. We work upwards from n | 
| 2168 |                           until we reach p (the parent of the atom we | 
| 2169 |                           reenter or the youngest common ancestor). | 
| 2170 |                         */ | 
| 2171 |                         while (n > p) { | 
| 2172 |                             cap = eng->f.at(i: n).capture; | 
| 2173 |                             if (cap >= 0) { | 
| 2174 |                                 capBegin[cap] = i; | 
| 2175 |                                 capEnd[cap] = EmptyCapture; | 
| 2176 |                             } | 
| 2177 |                             n = eng->f.at(i: n).parent; | 
| 2178 |                         } | 
| 2179 |                         /* | 
| 2180 |                           If the next state was already in | 
| 2181 |                           nextStack, we must choose carefully which | 
| 2182 |                           capture zones we want to keep. | 
| 2183 |                         */ | 
| 2184 |                         if (capBegin == tempCapBegin && | 
| 2185 |                                 isBetterCapture(ncap, begin1: capBegin, end1: capEnd, begin2: nextCapBegin + m * ncap, | 
| 2186 |                                                 end2: nextCapEnd + m * ncap)) { | 
| 2187 |                             memcpy(dest: nextCapBegin + m * ncap, src: capBegin, n: ncap * sizeof(int)); | 
| 2188 |                             memcpy(dest: nextCapEnd + m * ncap, src: capEnd, n: ncap * sizeof(int)); | 
| 2189 |                         } | 
| 2190 |                     } | 
| 2191 | #ifndef QT_NO_REGEXP_BACKREF | 
| 2192 |                     /* | 
| 2193 |                       We are done with updating the capture zones. | 
| 2194 |                       It's now time to put the next state to sleep, | 
| 2195 |                       if it needs to, and to remove it from | 
| 2196 |                       nextStack. | 
| 2197 |                     */ | 
| 2198 |                     if (needSomeSleep > 0) { | 
| 2199 |                         QList<int> zzZ(2 + 2 * ncap); | 
| 2200 |                         zzZ[0] = i + needSomeSleep; | 
| 2201 |                         zzZ[1] = next; | 
| 2202 |                         if (ncap > 0) { | 
| 2203 |                             memcpy(dest: zzZ.data() + 2, src: capBegin, n: ncap * sizeof(int)); | 
| 2204 |                             memcpy(dest: zzZ.data() + 2 + ncap, src: capEnd, n: ncap * sizeof(int)); | 
| 2205 |                         } | 
| 2206 |                         inNextStack[nextStack[--nnext]] = -1; | 
| 2207 |                         sleeping.append(t: zzZ); | 
| 2208 |                     } | 
| 2209 | #endif | 
| 2210 | #endif | 
| 2211 |                 } | 
| 2212 |             } | 
| 2213 |         } | 
| 2214 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2215 |         /* | 
| 2216 |           If we reached the final state, hurray! Copy the captured | 
| 2217 |           zone. | 
| 2218 |         */ | 
| 2219 |         if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) { | 
| 2220 |             memcpy(dest: capBegin, src: nextCapBegin + m * ncap, n: ncap * sizeof(int)); | 
| 2221 |             memcpy(dest: capEnd, src: nextCapEnd + m * ncap, n: ncap * sizeof(int)); | 
| 2222 |         } | 
| 2223 | #ifndef QT_NO_REGEXP_BACKREF | 
| 2224 |         /* | 
| 2225 |           It's time to wake up the sleepers. | 
| 2226 |         */ | 
| 2227 |         j = 0; | 
| 2228 |         while (j < sleeping.size()) { | 
| 2229 |             if (sleeping.at(i: j)[0] == i) { | 
| 2230 |                 const QList<int> &zzZ = sleeping.at(i: j); | 
| 2231 |                 int next = zzZ[1]; | 
| 2232 |                 const int *capBegin = zzZ.data() + 2; | 
| 2233 |                 const int *capEnd = zzZ.data() + 2 + ncap; | 
| 2234 |                 bool copyOver = true; | 
| 2235 |  | 
| 2236 |                 if ((m = inNextStack[next]) == -1) { | 
| 2237 |                     m = nnext++; | 
| 2238 |                     nextStack[m] = next; | 
| 2239 |                     inNextStack[next] = m; | 
| 2240 |                 } else { | 
| 2241 |                     copyOver = isBetterCapture(ncap, begin1: nextCapBegin + m * ncap, end1: nextCapEnd + m * ncap, | 
| 2242 |                                                begin2: capBegin, end2: capEnd); | 
| 2243 |                 } | 
| 2244 |                 if (copyOver) { | 
| 2245 |                     memcpy(dest: nextCapBegin + m * ncap, src: capBegin, n: ncap * sizeof(int)); | 
| 2246 |                     memcpy(dest: nextCapEnd + m * ncap, src: capEnd, n: ncap * sizeof(int)); | 
| 2247 |                 } | 
| 2248 |  | 
| 2249 |                 sleeping.removeAt(i: j); | 
| 2250 |             } else { | 
| 2251 |                 ++j; | 
| 2252 |             } | 
| 2253 |         } | 
| 2254 | #endif | 
| 2255 | #endif | 
| 2256 |         for (j = 0; j < nnext; j++) | 
| 2257 |             inNextStack[nextStack[j]] = -1; | 
| 2258 |  | 
| 2259 |         // avoid needless iteration that confuses oneTestMatchedLen | 
| 2260 |         if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState | 
| 2261 | #ifndef QT_NO_REGEXP_BACKREF | 
| 2262 |              && sleeping.isEmpty() | 
| 2263 | #endif | 
| 2264 |            ) | 
| 2265 |             stop = true; | 
| 2266 |  | 
| 2267 |         qSwap(value1&: curStack, value2&: nextStack); | 
| 2268 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2269 |         qSwap(value1&: curCapBegin, value2&: nextCapBegin); | 
| 2270 |         qSwap(value1&: curCapEnd, value2&: nextCapEnd); | 
| 2271 | #endif | 
| 2272 |         ncur = nnext; | 
| 2273 |         nnext = 0; | 
| 2274 |         ++i; | 
| 2275 |     } | 
| 2276 |  | 
| 2277 | #ifndef QT_NO_REGEXP_BACKREF | 
| 2278 |     /* | 
| 2279 |       If minimal matching is enabled, we might have some sleepers | 
| 2280 |       left. | 
| 2281 |     */ | 
| 2282 |     if (!sleeping.isEmpty()) | 
| 2283 |         sleeping.clear(); | 
| 2284 | #endif | 
| 2285 |  | 
| 2286 |     oneTestMatchedLen = i - 1; | 
| 2287 |     return (matchLen >= 0); | 
| 2288 | } | 
| 2289 |  | 
| 2290 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2291 |  | 
| 2292 | QRegExpCharClass::QRegExpCharClass() | 
| 2293 |     : c(0), n(false) | 
| 2294 | { | 
| 2295 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2296 |     occ1.fill(t: NoOccurrence, newSize: NumBadChars); | 
| 2297 | #endif | 
| 2298 | } | 
| 2299 |  | 
| 2300 | void QRegExpCharClass::clear() | 
| 2301 | { | 
| 2302 |     c = 0; | 
| 2303 |     r.clear(); | 
| 2304 |     n = false; | 
| 2305 | } | 
| 2306 |  | 
| 2307 | void QRegExpCharClass::setNegative(bool negative) | 
| 2308 | { | 
| 2309 |     n = negative; | 
| 2310 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2311 |     occ1.fill(t: 0, newSize: NumBadChars); | 
| 2312 | #endif | 
| 2313 | } | 
| 2314 |  | 
| 2315 | void QRegExpCharClass::addCategories(uint cats) | 
| 2316 | { | 
| 2317 |     static const int all_cats = FLAG(QChar::Mark_NonSpacing) | | 
| 2318 |                                 FLAG(QChar::Mark_SpacingCombining) | | 
| 2319 |                                 FLAG(QChar::Mark_Enclosing) | | 
| 2320 |                                 FLAG(QChar::Number_DecimalDigit) | | 
| 2321 |                                 FLAG(QChar::Number_Letter) | | 
| 2322 |                                 FLAG(QChar::Number_Other) | | 
| 2323 |                                 FLAG(QChar::Separator_Space) | | 
| 2324 |                                 FLAG(QChar::Separator_Line) | | 
| 2325 |                                 FLAG(QChar::Separator_Paragraph) | | 
| 2326 |                                 FLAG(QChar::Other_Control) | | 
| 2327 |                                 FLAG(QChar::Other_Format) | | 
| 2328 |                                 FLAG(QChar::Other_Surrogate) | | 
| 2329 |                                 FLAG(QChar::Other_PrivateUse) | | 
| 2330 |                                 FLAG(QChar::Other_NotAssigned) | | 
| 2331 |                                 FLAG(QChar::Letter_Uppercase) | | 
| 2332 |                                 FLAG(QChar::Letter_Lowercase) | | 
| 2333 |                                 FLAG(QChar::Letter_Titlecase) | | 
| 2334 |                                 FLAG(QChar::Letter_Modifier) | | 
| 2335 |                                 FLAG(QChar::Letter_Other) | | 
| 2336 |                                 FLAG(QChar::Punctuation_Connector) | | 
| 2337 |                                 FLAG(QChar::Punctuation_Dash) | | 
| 2338 |                                 FLAG(QChar::Punctuation_Open) | | 
| 2339 |                                 FLAG(QChar::Punctuation_Close) | | 
| 2340 |                                 FLAG(QChar::Punctuation_InitialQuote) | | 
| 2341 |                                 FLAG(QChar::Punctuation_FinalQuote) | | 
| 2342 |                                 FLAG(QChar::Punctuation_Other) | | 
| 2343 |                                 FLAG(QChar::Symbol_Math) | | 
| 2344 |                                 FLAG(QChar::Symbol_Currency) | | 
| 2345 |                                 FLAG(QChar::Symbol_Modifier) | | 
| 2346 |                                 FLAG(QChar::Symbol_Other); | 
| 2347 |     c |= (all_cats & cats); | 
| 2348 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2349 |     occ1.fill(t: 0, newSize: NumBadChars); | 
| 2350 | #endif | 
| 2351 | } | 
| 2352 |  | 
| 2353 | void QRegExpCharClass::addRange(ushort from, ushort to) | 
| 2354 | { | 
| 2355 |     if (from > to) | 
| 2356 |         qSwap(value1&: from, value2&: to); | 
| 2357 |     int m = r.size(); | 
| 2358 |     r.resize(size: m + 1); | 
| 2359 |     r[m].from = from; | 
| 2360 |     r[m].len = to - from + 1; | 
| 2361 |  | 
| 2362 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2363 |     int i; | 
| 2364 |  | 
| 2365 |     if (to - from < NumBadChars) { | 
| 2366 |         if (from % NumBadChars <= to % NumBadChars) { | 
| 2367 |             for (i = from % NumBadChars; i <= to % NumBadChars; i++) | 
| 2368 |                 occ1[i] = 0; | 
| 2369 |         } else { | 
| 2370 |             for (i = 0; i <= to % NumBadChars; i++) | 
| 2371 |                 occ1[i] = 0; | 
| 2372 |             for (i = from % NumBadChars; i < NumBadChars; i++) | 
| 2373 |                 occ1[i] = 0; | 
| 2374 |         } | 
| 2375 |     } else { | 
| 2376 |         occ1.fill(t: 0, newSize: NumBadChars); | 
| 2377 |     } | 
| 2378 | #endif | 
| 2379 | } | 
| 2380 |  | 
| 2381 | bool QRegExpCharClass::in(QChar ch) const | 
| 2382 | { | 
| 2383 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2384 |     if (occ1.at(BadChar(ch)) == NoOccurrence) | 
| 2385 |         return n; | 
| 2386 | #endif | 
| 2387 |  | 
| 2388 |     if (c != 0 && (c & FLAG(ch.category())) != 0) | 
| 2389 |         return !n; | 
| 2390 |  | 
| 2391 |     const int uc = ch.unicode(); | 
| 2392 |     int size = r.size(); | 
| 2393 |  | 
| 2394 |     for (int i = 0; i < size; ++i) { | 
| 2395 |         const QRegExpCharClassRange &range = r.at(i); | 
| 2396 |         if (uint(uc - range.from) < uint(r.at(i).len)) | 
| 2397 |             return !n; | 
| 2398 |     } | 
| 2399 |     return n; | 
| 2400 | } | 
| 2401 |  | 
| 2402 | #if defined(QT_DEBUG) | 
| 2403 | void QRegExpCharClass::dump() const | 
| 2404 | { | 
| 2405 |     int i; | 
| 2406 |     qDebug(msg: "    %stive character class" , n ? "nega"  : "posi" ); | 
| 2407 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2408 |     if (c != 0) | 
| 2409 |         qDebug(msg: "      categories 0x%.8x" , c); | 
| 2410 | #endif | 
| 2411 |     for (i = 0; i < r.size(); i++) | 
| 2412 |         qDebug(msg: "      0x%.4x through 0x%.4x" , r[i].from, r[i].from + r[i].len - 1); | 
| 2413 | } | 
| 2414 | #endif | 
| 2415 | #endif | 
| 2416 |  | 
| 2417 | QRegExpEngine::Box::Box(QRegExpEngine *engine) | 
| 2418 |     : eng(engine), skipanchors(0) | 
| 2419 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2420 |       , earlyStart(0), lateStart(0), maxl(0) | 
| 2421 | #endif | 
| 2422 | { | 
| 2423 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2424 |     occ1.fill(t: NoOccurrence, newSize: NumBadChars); | 
| 2425 | #endif | 
| 2426 |     minl = 0; | 
| 2427 | } | 
| 2428 |  | 
| 2429 | QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b) | 
| 2430 | { | 
| 2431 |     eng = b.eng; | 
| 2432 |     ls = b.ls; | 
| 2433 |     rs = b.rs; | 
| 2434 |     lanchors = b.lanchors; | 
| 2435 |     ranchors = b.ranchors; | 
| 2436 |     skipanchors = b.skipanchors; | 
| 2437 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2438 |     earlyStart = b.earlyStart; | 
| 2439 |     lateStart = b.lateStart; | 
| 2440 |     str = b.str; | 
| 2441 |     leftStr = b.leftStr; | 
| 2442 |     rightStr = b.rightStr; | 
| 2443 |     maxl = b.maxl; | 
| 2444 |     occ1 = b.occ1; | 
| 2445 | #endif | 
| 2446 |     minl = b.minl; | 
| 2447 |     return *this; | 
| 2448 | } | 
| 2449 |  | 
| 2450 | void QRegExpEngine::Box::set(QChar ch) | 
| 2451 | { | 
| 2452 |     ls.resize(size: 1); | 
| 2453 |     ls[0] = eng->createState(ch); | 
| 2454 |     rs = ls; | 
| 2455 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2456 |     str = ch; | 
| 2457 |     leftStr = ch; | 
| 2458 |     rightStr = ch; | 
| 2459 |     maxl = 1; | 
| 2460 |     occ1[BadChar(ch)] = 0; | 
| 2461 | #endif | 
| 2462 |     minl = 1; | 
| 2463 | } | 
| 2464 |  | 
| 2465 | void QRegExpEngine::Box::set(const QRegExpCharClass &cc) | 
| 2466 | { | 
| 2467 |     ls.resize(size: 1); | 
| 2468 |     ls[0] = eng->createState(cc); | 
| 2469 |     rs = ls; | 
| 2470 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2471 |     maxl = 1; | 
| 2472 |     occ1 = cc.firstOccurrence(); | 
| 2473 | #endif | 
| 2474 |     minl = 1; | 
| 2475 | } | 
| 2476 |  | 
| 2477 | #ifndef QT_NO_REGEXP_BACKREF | 
| 2478 | void QRegExpEngine::Box::set(int bref) | 
| 2479 | { | 
| 2480 |     ls.resize(size: 1); | 
| 2481 |     ls[0] = eng->createState(bref); | 
| 2482 |     rs = ls; | 
| 2483 |     if (bref >= 1 && bref <= MaxBackRefs) | 
| 2484 |         skipanchors = Anchor_BackRef0Empty << bref; | 
| 2485 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2486 |     maxl = InftyLen; | 
| 2487 | #endif | 
| 2488 |     minl = 0; | 
| 2489 | } | 
| 2490 | #endif | 
| 2491 |  | 
| 2492 | void QRegExpEngine::Box::cat(const Box &b) | 
| 2493 | { | 
| 2494 |     eng->addCatTransitions(from: rs, to: b.ls); | 
| 2495 |     addAnchorsToEngine(to: b); | 
| 2496 |     if (minl == 0) { | 
| 2497 |         lanchors.insert(map: b.lanchors); | 
| 2498 |         if (skipanchors != 0) { | 
| 2499 |             for (int i = 0; i < b.ls.size(); i++) { | 
| 2500 |                 int a = eng->anchorConcatenation(a: lanchors.value(key: b.ls.at(i), defaultValue: 0), b: skipanchors); | 
| 2501 |                 lanchors.insert(key: b.ls.at(i), value: a); | 
| 2502 |             } | 
| 2503 |         } | 
| 2504 |         mergeInto(a: &ls, b: b.ls); | 
| 2505 |     } | 
| 2506 |     if (b.minl == 0) { | 
| 2507 |         ranchors.insert(map: b.ranchors); | 
| 2508 |         if (b.skipanchors != 0) { | 
| 2509 |             for (int i = 0; i < rs.size(); i++) { | 
| 2510 |                 int a = eng->anchorConcatenation(a: ranchors.value(key: rs.at(i), defaultValue: 0), b: b.skipanchors); | 
| 2511 |                 ranchors.insert(key: rs.at(i), value: a); | 
| 2512 |             } | 
| 2513 |         } | 
| 2514 |         mergeInto(a: &rs, b: b.rs); | 
| 2515 |     } else { | 
| 2516 |         ranchors = b.ranchors; | 
| 2517 |         rs = b.rs; | 
| 2518 |     } | 
| 2519 |  | 
| 2520 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2521 |     if (maxl != InftyLen) { | 
| 2522 |         if (rightStr.size() + b.leftStr.size() > | 
| 2523 |              qMax(a: str.size(), b: b.str.size())) { | 
| 2524 |             earlyStart = minl - rightStr.size(); | 
| 2525 |             lateStart = maxl - rightStr.size(); | 
| 2526 |             str = rightStr + b.leftStr; | 
| 2527 |         } else if (b.str.size() > str.size()) { | 
| 2528 |             earlyStart = minl + b.earlyStart; | 
| 2529 |             lateStart = maxl + b.lateStart; | 
| 2530 |             str = b.str; | 
| 2531 |         } | 
| 2532 |     } | 
| 2533 |  | 
| 2534 |     if (leftStr.size() == maxl) | 
| 2535 |         leftStr += b.leftStr; | 
| 2536 |  | 
| 2537 |     if (b.rightStr.size() == b.maxl) { | 
| 2538 |         rightStr += b.rightStr; | 
| 2539 |     } else { | 
| 2540 |         rightStr = b.rightStr; | 
| 2541 |     } | 
| 2542 |  | 
| 2543 |     if (maxl == InftyLen || b.maxl == InftyLen) { | 
| 2544 |         maxl = InftyLen; | 
| 2545 |     } else { | 
| 2546 |         maxl += b.maxl; | 
| 2547 |     } | 
| 2548 |  | 
| 2549 |     for (int i = 0; i < NumBadChars; i++) { | 
| 2550 |         if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i)) | 
| 2551 |             occ1[i] = minl + b.occ1.at(i); | 
| 2552 |     } | 
| 2553 | #endif | 
| 2554 |  | 
| 2555 |     minl += b.minl; | 
| 2556 |     if (minl == 0) | 
| 2557 |         skipanchors = eng->anchorConcatenation(a: skipanchors, b: b.skipanchors); | 
| 2558 |     else | 
| 2559 |         skipanchors = 0; | 
| 2560 | } | 
| 2561 |  | 
| 2562 | void QRegExpEngine::Box::orx(const Box &b) | 
| 2563 | { | 
| 2564 |     mergeInto(a: &ls, b: b.ls); | 
| 2565 |     lanchors.insert(map: b.lanchors); | 
| 2566 |     mergeInto(a: &rs, b: b.rs); | 
| 2567 |     ranchors.insert(map: b.ranchors); | 
| 2568 |  | 
| 2569 |     if (b.minl == 0) { | 
| 2570 |         if (minl == 0) | 
| 2571 |             skipanchors = eng->anchorAlternation(a: skipanchors, b: b.skipanchors); | 
| 2572 |         else | 
| 2573 |             skipanchors = b.skipanchors; | 
| 2574 |     } | 
| 2575 |  | 
| 2576 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2577 |     for (int i = 0; i < NumBadChars; i++) { | 
| 2578 |         if (occ1.at(i) > b.occ1.at(i)) | 
| 2579 |             occ1[i] = b.occ1.at(i); | 
| 2580 |     } | 
| 2581 |     earlyStart = 0; | 
| 2582 |     lateStart = 0; | 
| 2583 |     str = QString(); | 
| 2584 |     leftStr = QString(); | 
| 2585 |     rightStr = QString(); | 
| 2586 |     if (b.maxl > maxl) | 
| 2587 |         maxl = b.maxl; | 
| 2588 | #endif | 
| 2589 |     if (b.minl < minl) | 
| 2590 |         minl = b.minl; | 
| 2591 | } | 
| 2592 |  | 
| 2593 | void QRegExpEngine::Box::plus(int atom) | 
| 2594 | { | 
| 2595 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 2596 |     eng->addPlusTransitions(from: rs, to: ls, atom); | 
| 2597 | #else | 
| 2598 |     Q_UNUSED(atom); | 
| 2599 |     eng->addCatTransitions(rs, ls); | 
| 2600 | #endif | 
| 2601 |     addAnchorsToEngine(to: *this); | 
| 2602 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2603 |     maxl = InftyLen; | 
| 2604 | #endif | 
| 2605 | } | 
| 2606 |  | 
| 2607 | void QRegExpEngine::Box::opt() | 
| 2608 | { | 
| 2609 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2610 |     earlyStart = 0; | 
| 2611 |     lateStart = 0; | 
| 2612 |     str = QString(); | 
| 2613 |     leftStr = QString(); | 
| 2614 |     rightStr = QString(); | 
| 2615 | #endif | 
| 2616 |     skipanchors = 0; | 
| 2617 |     minl = 0; | 
| 2618 | } | 
| 2619 |  | 
| 2620 | void QRegExpEngine::Box::catAnchor(int a) | 
| 2621 | { | 
| 2622 |     if (a != 0) { | 
| 2623 |         for (int i = 0; i < rs.size(); i++) { | 
| 2624 |             a = eng->anchorConcatenation(a: ranchors.value(key: rs.at(i), defaultValue: 0), b: a); | 
| 2625 |             ranchors.insert(key: rs.at(i), value: a); | 
| 2626 |         } | 
| 2627 |         if (minl == 0) | 
| 2628 |             skipanchors = eng->anchorConcatenation(a: skipanchors, b: a); | 
| 2629 |     } | 
| 2630 | } | 
| 2631 |  | 
| 2632 | #ifndef QT_NO_REGEXP_OPTIM | 
| 2633 | void QRegExpEngine::Box::setupHeuristics() | 
| 2634 | { | 
| 2635 |     eng->goodEarlyStart = earlyStart; | 
| 2636 |     eng->goodLateStart = lateStart; | 
| 2637 |     eng->goodStr = eng->cs ? str : str.toLower(); | 
| 2638 |  | 
| 2639 |     eng->minl = minl; | 
| 2640 |     if (eng->cs) { | 
| 2641 |         /* | 
| 2642 |           A regular expression such as 112|1 has occ1['2'] = 2 and minl = | 
| 2643 |           1 at this point. An entry of occ1 has to be at most minl or | 
| 2644 |           infinity for the rest of the algorithm to go well. | 
| 2645 |  | 
| 2646 |           We waited until here before normalizing these cases (instead of | 
| 2647 |           doing it in Box::orx()) because sometimes things improve by | 
| 2648 |           themselves. Consider for example (112|1)34. | 
| 2649 |         */ | 
| 2650 |         for (int i = 0; i < NumBadChars; i++) { | 
| 2651 |             if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl) | 
| 2652 |                 occ1[i] = minl; | 
| 2653 |         } | 
| 2654 |         eng->occ1 = occ1; | 
| 2655 |     } else { | 
| 2656 |         eng->occ1.fill(t: 0, newSize: NumBadChars); | 
| 2657 |     } | 
| 2658 |  | 
| 2659 |     eng->heuristicallyChooseHeuristic(); | 
| 2660 | } | 
| 2661 | #endif | 
| 2662 |  | 
| 2663 | #if defined(QT_DEBUG) | 
| 2664 | void QRegExpEngine::Box::dump() const | 
| 2665 | { | 
| 2666 |     int i; | 
| 2667 |     qDebug(msg: "Box of at least %d character%s" , minl, minl == 1 ? ""  : "s" ); | 
| 2668 |     qDebug(msg: "  Left states:" ); | 
| 2669 |     for (i = 0; i < ls.size(); i++) { | 
| 2670 |         if (lanchors.value(key: ls[i], defaultValue: 0) == 0) | 
| 2671 |             qDebug(msg: "    %d" , ls[i]); | 
| 2672 |         else | 
| 2673 |             qDebug(msg: "    %d [anchors 0x%.8x]" , ls[i], lanchors[ls[i]]); | 
| 2674 |     } | 
| 2675 |     qDebug(msg: "  Right states:" ); | 
| 2676 |     for (i = 0; i < rs.size(); i++) { | 
| 2677 |         if (ranchors.value(key: rs[i], defaultValue: 0) == 0) | 
| 2678 |             qDebug(msg: "    %d" , rs[i]); | 
| 2679 |         else | 
| 2680 |             qDebug(msg: "    %d [anchors 0x%.8x]" , rs[i], ranchors[rs[i]]); | 
| 2681 |     } | 
| 2682 |     qDebug(msg: "  Skip anchors: 0x%.8x" , skipanchors); | 
| 2683 | } | 
| 2684 | #endif | 
| 2685 |  | 
| 2686 | void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const | 
| 2687 | { | 
| 2688 |     for (int i = 0; i < to.ls.size(); i++) { | 
| 2689 |         for (int j = 0; j < rs.size(); j++) { | 
| 2690 |             int a = eng->anchorConcatenation(a: ranchors.value(key: rs.at(i: j), defaultValue: 0), | 
| 2691 |                                              b: to.lanchors.value(key: to.ls.at(i), defaultValue: 0)); | 
| 2692 |             eng->addAnchors(from: rs[j], to: to.ls[i], a); | 
| 2693 |         } | 
| 2694 |     } | 
| 2695 | } | 
| 2696 |  | 
| 2697 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2698 | // fast lookup hash for xml schema extensions | 
| 2699 | // sorted by name for b-search | 
| 2700 | static const struct CategoriesRangeMapEntry { | 
| 2701 |     const char name[40]; | 
| 2702 |     uint first, second; | 
| 2703 | } categoriesRangeMap[] = { | 
| 2704 |     { .name: "AegeanNumbers" ,                        .first: 0x10100, .second: 0x1013F }, | 
| 2705 |     { .name: "AlphabeticPresentationForms" ,          .first: 0xFB00, .second: 0xFB4F }, | 
| 2706 |     { .name: "AncientGreekMusicalNotation" ,          .first: 0x1D200, .second: 0x1D24F }, | 
| 2707 |     { .name: "AncientGreekNumbers" ,                  .first: 0x10140, .second: 0x1018F }, | 
| 2708 |     { .name: "Arabic" ,                               .first: 0x0600, .second: 0x06FF }, | 
| 2709 |     { .name: "ArabicPresentationForms-A" ,            .first: 0xFB50, .second: 0xFDFF }, | 
| 2710 |     { .name: "ArabicPresentationForms-B" ,            .first: 0xFE70, .second: 0xFEFF }, | 
| 2711 |     { .name: "ArabicSupplement" ,                     .first: 0x0750, .second: 0x077F }, | 
| 2712 |     { .name: "Armenian" ,                             .first: 0x0530, .second: 0x058F }, | 
| 2713 |     { .name: "Arrows" ,                               .first: 0x2190, .second: 0x21FF }, | 
| 2714 |     { .name: "BasicLatin" ,                           .first: 0x0000, .second: 0x007F }, | 
| 2715 |     { .name: "Bengali" ,                              .first: 0x0980, .second: 0x09FF }, | 
| 2716 |     { .name: "BlockElements" ,                        .first: 0x2580, .second: 0x259F }, | 
| 2717 |     { .name: "Bopomofo" ,                             .first: 0x3100, .second: 0x312F }, | 
| 2718 |     { .name: "BopomofoExtended" ,                     .first: 0x31A0, .second: 0x31BF }, | 
| 2719 |     { .name: "BoxDrawing" ,                           .first: 0x2500, .second: 0x257F }, | 
| 2720 |     { .name: "BraillePatterns" ,                      .first: 0x2800, .second: 0x28FF }, | 
| 2721 |     { .name: "Buginese" ,                             .first: 0x1A00, .second: 0x1A1F }, | 
| 2722 |     { .name: "Buhid" ,                                .first: 0x1740, .second: 0x175F }, | 
| 2723 |     { .name: "ByzantineMusicalSymbols" ,              .first: 0x1D000, .second: 0x1D0FF }, | 
| 2724 |     { .name: "CJKCompatibility" ,                     .first: 0x3300, .second: 0x33FF }, | 
| 2725 |     { .name: "CJKCompatibilityForms" ,                .first: 0xFE30, .second: 0xFE4F }, | 
| 2726 |     { .name: "CJKCompatibilityIdeographs" ,           .first: 0xF900, .second: 0xFAFF }, | 
| 2727 |     { .name: "CJKCompatibilityIdeographsSupplement" , .first: 0x2F800, .second: 0x2FA1F }, | 
| 2728 |     { .name: "CJKRadicalsSupplement" ,                .first: 0x2E80, .second: 0x2EFF }, | 
| 2729 |     { .name: "CJKStrokes" ,                           .first: 0x31C0, .second: 0x31EF }, | 
| 2730 |     { .name: "CJKSymbolsandPunctuation" ,             .first: 0x3000, .second: 0x303F }, | 
| 2731 |     { .name: "CJKUnifiedIdeographs" ,                 .first: 0x4E00, .second: 0x9FFF }, | 
| 2732 |     { .name: "CJKUnifiedIdeographsExtensionA" ,       .first: 0x3400, .second: 0x4DB5 }, | 
| 2733 |     { .name: "CJKUnifiedIdeographsExtensionB" ,       .first: 0x20000, .second: 0x2A6DF }, | 
| 2734 |     { .name: "Cherokee" ,                             .first: 0x13A0, .second: 0x13FF }, | 
| 2735 |     { .name: "CombiningDiacriticalMarks" ,            .first: 0x0300, .second: 0x036F }, | 
| 2736 |     { .name: "CombiningDiacriticalMarksSupplement" ,  .first: 0x1DC0, .second: 0x1DFF }, | 
| 2737 |     { .name: "CombiningHalfMarks" ,                   .first: 0xFE20, .second: 0xFE2F }, | 
| 2738 |     { .name: "CombiningMarksforSymbols" ,             .first: 0x20D0, .second: 0x20FF }, | 
| 2739 |     { .name: "ControlPictures" ,                      .first: 0x2400, .second: 0x243F }, | 
| 2740 |     { .name: "Coptic" ,                               .first: 0x2C80, .second: 0x2CFF }, | 
| 2741 |     { .name: "CurrencySymbols" ,                      .first: 0x20A0, .second: 0x20CF }, | 
| 2742 |     { .name: "CypriotSyllabary" ,                     .first: 0x10800, .second: 0x1083F }, | 
| 2743 |     { .name: "Cyrillic" ,                             .first: 0x0400, .second: 0x04FF }, | 
| 2744 |     { .name: "CyrillicSupplement" ,                   .first: 0x0500, .second: 0x052F }, | 
| 2745 |     { .name: "Deseret" ,                              .first: 0x10400, .second: 0x1044F }, | 
| 2746 |     { .name: "Devanagari" ,                           .first: 0x0900, .second: 0x097F }, | 
| 2747 |     { .name: "Dingbats" ,                             .first: 0x2700, .second: 0x27BF }, | 
| 2748 |     { .name: "EnclosedAlphanumerics" ,                .first: 0x2460, .second: 0x24FF }, | 
| 2749 |     { .name: "EnclosedCJKLettersandMonths" ,          .first: 0x3200, .second: 0x32FF }, | 
| 2750 |     { .name: "Ethiopic" ,                             .first: 0x1200, .second: 0x137F }, | 
| 2751 |     { .name: "EthiopicExtended" ,                     .first: 0x2D80, .second: 0x2DDF }, | 
| 2752 |     { .name: "EthiopicSupplement" ,                   .first: 0x1380, .second: 0x139F }, | 
| 2753 |     { .name: "GeneralPunctuation" ,                   .first: 0x2000, .second: 0x206F }, | 
| 2754 |     { .name: "GeometricShapes" ,                      .first: 0x25A0, .second: 0x25FF }, | 
| 2755 |     { .name: "Georgian" ,                             .first: 0x10A0, .second: 0x10FF }, | 
| 2756 |     { .name: "GeorgianSupplement" ,                   .first: 0x2D00, .second: 0x2D2F }, | 
| 2757 |     { .name: "Glagolitic" ,                           .first: 0x2C00, .second: 0x2C5F }, | 
| 2758 |     { .name: "Gothic" ,                               .first: 0x10330, .second: 0x1034F }, | 
| 2759 |     { .name: "Greek" ,                                .first: 0x0370, .second: 0x03FF }, | 
| 2760 |     { .name: "GreekExtended" ,                        .first: 0x1F00, .second: 0x1FFF }, | 
| 2761 |     { .name: "Gujarati" ,                             .first: 0x0A80, .second: 0x0AFF }, | 
| 2762 |     { .name: "Gurmukhi" ,                             .first: 0x0A00, .second: 0x0A7F }, | 
| 2763 |     { .name: "HalfwidthandFullwidthForms" ,           .first: 0xFF00, .second: 0xFFEF }, | 
| 2764 |     { .name: "HangulCompatibilityJamo" ,              .first: 0x3130, .second: 0x318F }, | 
| 2765 |     { .name: "HangulJamo" ,                           .first: 0x1100, .second: 0x11FF }, | 
| 2766 |     { .name: "HangulSyllables" ,                      .first: 0xAC00, .second: 0xD7A3 }, | 
| 2767 |     { .name: "Hanunoo" ,                              .first: 0x1720, .second: 0x173F }, | 
| 2768 |     { .name: "Hebrew" ,                               .first: 0x0590, .second: 0x05FF }, | 
| 2769 |     { .name: "Hiragana" ,                             .first: 0x3040, .second: 0x309F }, | 
| 2770 |     { .name: "IPAExtensions" ,                        .first: 0x0250, .second: 0x02AF }, | 
| 2771 |     { .name: "IdeographicDescriptionCharacters" ,     .first: 0x2FF0, .second: 0x2FFF }, | 
| 2772 |     { .name: "Kanbun" ,                               .first: 0x3190, .second: 0x319F }, | 
| 2773 |     { .name: "KangxiRadicals" ,                       .first: 0x2F00, .second: 0x2FDF }, | 
| 2774 |     { .name: "Kannada" ,                              .first: 0x0C80, .second: 0x0CFF }, | 
| 2775 |     { .name: "Katakana" ,                             .first: 0x30A0, .second: 0x30FF }, | 
| 2776 |     { .name: "KatakanaPhoneticExtensions" ,           .first: 0x31F0, .second: 0x31FF }, | 
| 2777 |     { .name: "Kharoshthi" ,                           .first: 0x10A00, .second: 0x10A5F }, | 
| 2778 |     { .name: "Khmer" ,                                .first: 0x1780, .second: 0x17FF }, | 
| 2779 |     { .name: "KhmerSymbols" ,                         .first: 0x19E0, .second: 0x19FF }, | 
| 2780 |     { .name: "Lao" ,                                  .first: 0x0E80, .second: 0x0EFF }, | 
| 2781 |     { .name: "Latin-1Supplement" ,                    .first: 0x0080, .second: 0x00FF }, | 
| 2782 |     { .name: "LatinExtended-A" ,                      .first: 0x0100, .second: 0x017F }, | 
| 2783 |     { .name: "LatinExtended-B" ,                      .first: 0x0180, .second: 0x024F }, | 
| 2784 |     { .name: "LatinExtendedAdditional" ,              .first: 0x1E00, .second: 0x1EFF }, | 
| 2785 |     { .name: "LetterlikeSymbols" ,                    .first: 0x2100, .second: 0x214F }, | 
| 2786 |     { .name: "Limbu" ,                                .first: 0x1900, .second: 0x194F }, | 
| 2787 |     { .name: "LinearBIdeograms" ,                     .first: 0x10080, .second: 0x100FF }, | 
| 2788 |     { .name: "LinearBSyllabary" ,                     .first: 0x10000, .second: 0x1007F }, | 
| 2789 |     { .name: "Malayalam" ,                            .first: 0x0D00, .second: 0x0D7F }, | 
| 2790 |     { .name: "MathematicalAlphanumericSymbols" ,      .first: 0x1D400, .second: 0x1D7FF }, | 
| 2791 |     { .name: "MathematicalOperators" ,                .first: 0x2200, .second: 0x22FF }, | 
| 2792 |     { .name: "MiscellaneousMathematicalSymbols-A" ,   .first: 0x27C0, .second: 0x27EF }, | 
| 2793 |     { .name: "MiscellaneousMathematicalSymbols-B" ,   .first: 0x2980, .second: 0x29FF }, | 
| 2794 |     { .name: "MiscellaneousSymbols" ,                 .first: 0x2600, .second: 0x26FF }, | 
| 2795 |     { .name: "MiscellaneousSymbolsandArrows" ,        .first: 0x2B00, .second: 0x2BFF }, | 
| 2796 |     { .name: "MiscellaneousTechnical" ,               .first: 0x2300, .second: 0x23FF }, | 
| 2797 |     { .name: "ModifierToneLetters" ,                  .first: 0xA700, .second: 0xA71F }, | 
| 2798 |     { .name: "Mongolian" ,                            .first: 0x1800, .second: 0x18AF }, | 
| 2799 |     { .name: "MusicalSymbols" ,                       .first: 0x1D100, .second: 0x1D1FF }, | 
| 2800 |     { .name: "Myanmar" ,                              .first: 0x1000, .second: 0x109F }, | 
| 2801 |     { .name: "NewTaiLue" ,                            .first: 0x1980, .second: 0x19DF }, | 
| 2802 |     { .name: "NumberForms" ,                          .first: 0x2150, .second: 0x218F }, | 
| 2803 |     { .name: "Ogham" ,                                .first: 0x1680, .second: 0x169F }, | 
| 2804 |     { .name: "OldItalic" ,                            .first: 0x10300, .second: 0x1032F }, | 
| 2805 |     { .name: "OldPersian" ,                           .first: 0x103A0, .second: 0x103DF }, | 
| 2806 |     { .name: "OpticalCharacterRecognition" ,          .first: 0x2440, .second: 0x245F }, | 
| 2807 |     { .name: "Oriya" ,                                .first: 0x0B00, .second: 0x0B7F }, | 
| 2808 |     { .name: "Osmanya" ,                              .first: 0x10480, .second: 0x104AF }, | 
| 2809 |     { .name: "PhoneticExtensions" ,                   .first: 0x1D00, .second: 0x1D7F }, | 
| 2810 |     { .name: "PhoneticExtensionsSupplement" ,         .first: 0x1D80, .second: 0x1DBF }, | 
| 2811 |     { .name: "PrivateUse" ,                           .first: 0xE000, .second: 0xF8FF }, | 
| 2812 |     { .name: "Runic" ,                                .first: 0x16A0, .second: 0x16FF }, | 
| 2813 |     { .name: "Shavian" ,                              .first: 0x10450, .second: 0x1047F }, | 
| 2814 |     { .name: "Sinhala" ,                              .first: 0x0D80, .second: 0x0DFF }, | 
| 2815 |     { .name: "SmallFormVariants" ,                    .first: 0xFE50, .second: 0xFE6F }, | 
| 2816 |     { .name: "SpacingModifierLetters" ,               .first: 0x02B0, .second: 0x02FF }, | 
| 2817 |     { .name: "Specials" ,                             .first: 0xFFF0, .second: 0xFFFF }, | 
| 2818 |     { .name: "SuperscriptsandSubscripts" ,            .first: 0x2070, .second: 0x209F }, | 
| 2819 |     { .name: "SupplementalArrows-A" ,                 .first: 0x27F0, .second: 0x27FF }, | 
| 2820 |     { .name: "SupplementalArrows-B" ,                 .first: 0x2900, .second: 0x297F }, | 
| 2821 |     { .name: "SupplementalMathematicalOperators" ,    .first: 0x2A00, .second: 0x2AFF }, | 
| 2822 |     { .name: "SupplementalPunctuation" ,              .first: 0x2E00, .second: 0x2E7F }, | 
| 2823 |     { .name: "SupplementaryPrivateUseArea-A" ,        .first: 0xF0000, .second: 0xFFFFF }, | 
| 2824 |     { .name: "SupplementaryPrivateUseArea-B" ,        .first: 0x100000, .second: 0x10FFFF }, | 
| 2825 |     { .name: "SylotiNagri" ,                          .first: 0xA800, .second: 0xA82F }, | 
| 2826 |     { .name: "Syriac" ,                               .first: 0x0700, .second: 0x074F }, | 
| 2827 |     { .name: "Tagalog" ,                              .first: 0x1700, .second: 0x171F }, | 
| 2828 |     { .name: "Tagbanwa" ,                             .first: 0x1760, .second: 0x177F }, | 
| 2829 |     { .name: "Tags" ,                                 .first: 0xE0000, .second: 0xE007F }, | 
| 2830 |     { .name: "TaiLe" ,                                .first: 0x1950, .second: 0x197F }, | 
| 2831 |     { .name: "TaiXuanJingSymbols" ,                   .first: 0x1D300, .second: 0x1D35F }, | 
| 2832 |     { .name: "Tamil" ,                                .first: 0x0B80, .second: 0x0BFF }, | 
| 2833 |     { .name: "Telugu" ,                               .first: 0x0C00, .second: 0x0C7F }, | 
| 2834 |     { .name: "Thaana" ,                               .first: 0x0780, .second: 0x07BF }, | 
| 2835 |     { .name: "Thai" ,                                 .first: 0x0E00, .second: 0x0E7F }, | 
| 2836 |     { .name: "Tibetan" ,                              .first: 0x0F00, .second: 0x0FFF }, | 
| 2837 |     { .name: "Tifinagh" ,                             .first: 0x2D30, .second: 0x2D7F }, | 
| 2838 |     { .name: "Ugaritic" ,                             .first: 0x10380, .second: 0x1039F }, | 
| 2839 |     { .name: "UnifiedCanadianAboriginalSyllabics" ,   .first: 0x1400, .second: 0x167F }, | 
| 2840 |     { .name: "VariationSelectors" ,                   .first: 0xFE00, .second: 0xFE0F }, | 
| 2841 |     { .name: "VariationSelectorsSupplement" ,         .first: 0xE0100, .second: 0xE01EF }, | 
| 2842 |     { .name: "VerticalForms" ,                        .first: 0xFE10, .second: 0xFE1F }, | 
| 2843 |     { .name: "YiRadicals" ,                           .first: 0xA490, .second: 0xA4CF }, | 
| 2844 |     { .name: "YiSyllables" ,                          .first: 0xA000, .second: 0xA48F }, | 
| 2845 |     { .name: "YijingHexagramSymbols" ,                .first: 0x4DC0, .second: 0x4DFF } | 
| 2846 | }; | 
| 2847 |  | 
| 2848 | inline bool operator<(const CategoriesRangeMapEntry &entry1, const CategoriesRangeMapEntry &entry2) | 
| 2849 | { return qstrcmp(str1: entry1.name, str2: entry2.name) < 0; } | 
| 2850 | inline bool operator<(const char *name, const CategoriesRangeMapEntry &entry) | 
| 2851 | { return qstrcmp(str1: name, str2: entry.name) < 0; } | 
| 2852 | inline bool operator<(const CategoriesRangeMapEntry &entry, const char *name) | 
| 2853 | { return qstrcmp(str1: entry.name, str2: name) < 0; } | 
| 2854 | #endif // QT_NO_REGEXP_CCLASS | 
| 2855 |  | 
| 2856 | int QRegExpEngine::getChar() | 
| 2857 | { | 
| 2858 |     return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode(); | 
| 2859 | } | 
| 2860 |  | 
| 2861 | int QRegExpEngine::getEscape() | 
| 2862 | { | 
| 2863 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 2864 |     const char tab[] = "afnrtv" ; // no b, as \b means word boundary | 
| 2865 |     const char backTab[] = "\a\f\n\r\t\v" ; | 
| 2866 |     ushort low; | 
| 2867 |     int i; | 
| 2868 | #endif | 
| 2869 |     ushort val; | 
| 2870 |     int prevCh = yyCh; | 
| 2871 |  | 
| 2872 |     if (prevCh == EOS) { | 
| 2873 |         error(RXERR_END); | 
| 2874 |         return Tok_Char | '\\'; | 
| 2875 |     } | 
| 2876 |     yyCh = getChar(); | 
| 2877 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 2878 |     if ((prevCh & ~0xff) == 0) { | 
| 2879 |         const char *p = strchr(s: tab, c: prevCh); | 
| 2880 |         if (p != nullptr) | 
| 2881 |             return Tok_Char | backTab[p - tab]; | 
| 2882 |     } | 
| 2883 | #endif | 
| 2884 |  | 
| 2885 |     switch (prevCh) { | 
| 2886 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 2887 |     case '0': | 
| 2888 |         val = 0; | 
| 2889 |         for (i = 0; i < 3; i++) { | 
| 2890 |             if (yyCh >= '0' && yyCh <= '7') | 
| 2891 |                 val = (val << 3) | (yyCh - '0'); | 
| 2892 |             else | 
| 2893 |                 break; | 
| 2894 |             yyCh = getChar(); | 
| 2895 |         } | 
| 2896 |         if ((val & ~0377) != 0) | 
| 2897 |             error(RXERR_OCTAL); | 
| 2898 |         return Tok_Char | val; | 
| 2899 | #endif | 
| 2900 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 2901 |     case 'B': | 
| 2902 |         return Tok_NonWord; | 
| 2903 | #endif | 
| 2904 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2905 |     case 'D': | 
| 2906 |         // see QChar::isDigit() | 
| 2907 |         yyCharClass->addCategories(cats: uint(-1) ^ FLAG(QChar::Number_DecimalDigit)); | 
| 2908 |         return Tok_CharClass; | 
| 2909 |     case 'S': | 
| 2910 |         // see QChar::isSpace() | 
| 2911 |         yyCharClass->addCategories(cats: uint(-1) ^ (FLAG(QChar::Separator_Space) | | 
| 2912 |                                                FLAG(QChar::Separator_Line) | | 
| 2913 |                                                FLAG(QChar::Separator_Paragraph) | | 
| 2914 |                                                FLAG(QChar::Other_Control))); | 
| 2915 |         yyCharClass->addRange(from: 0x0000, to: 0x0008); | 
| 2916 |         yyCharClass->addRange(from: 0x000e, to: 0x001f); | 
| 2917 |         yyCharClass->addRange(from: 0x007f, to: 0x0084); | 
| 2918 |         yyCharClass->addRange(from: 0x0086, to: 0x009f); | 
| 2919 |         return Tok_CharClass; | 
| 2920 |     case 'W': | 
| 2921 |         // see QChar::isLetterOrNumber() and QChar::isMark() | 
| 2922 |         yyCharClass->addCategories(cats: uint(-1) ^ (FLAG(QChar::Mark_NonSpacing) | | 
| 2923 |                                                FLAG(QChar::Mark_SpacingCombining) | | 
| 2924 |                                                FLAG(QChar::Mark_Enclosing) | | 
| 2925 |                                                FLAG(QChar::Number_DecimalDigit) | | 
| 2926 |                                                FLAG(QChar::Number_Letter) | | 
| 2927 |                                                FLAG(QChar::Number_Other) | | 
| 2928 |                                                FLAG(QChar::Letter_Uppercase) | | 
| 2929 |                                                FLAG(QChar::Letter_Lowercase) | | 
| 2930 |                                                FLAG(QChar::Letter_Titlecase) | | 
| 2931 |                                                FLAG(QChar::Letter_Modifier) | | 
| 2932 |                                                FLAG(QChar::Letter_Other) | | 
| 2933 |                                                FLAG(QChar::Punctuation_Connector))); | 
| 2934 |         yyCharClass->addRange(from: 0x203f, to: 0x2040); | 
| 2935 |         yyCharClass->addSingleton(ch: 0x2040); | 
| 2936 |         yyCharClass->addSingleton(ch: 0x2054); | 
| 2937 |         yyCharClass->addSingleton(ch: 0x30fb); | 
| 2938 |         yyCharClass->addRange(from: 0xfe33, to: 0xfe34); | 
| 2939 |         yyCharClass->addRange(from: 0xfe4d, to: 0xfe4f); | 
| 2940 |         yyCharClass->addSingleton(ch: 0xff3f); | 
| 2941 |         yyCharClass->addSingleton(ch: 0xff65); | 
| 2942 |         return Tok_CharClass; | 
| 2943 | #endif | 
| 2944 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 2945 |     case 'b': | 
| 2946 |         return Tok_Word; | 
| 2947 | #endif | 
| 2948 | #ifndef QT_NO_REGEXP_CCLASS | 
| 2949 |     case 'd': | 
| 2950 |         // see QChar::isDigit() | 
| 2951 |         yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); | 
| 2952 |         return Tok_CharClass; | 
| 2953 |     case 's': | 
| 2954 |         // see QChar::isSpace() | 
| 2955 |         yyCharClass->addCategories(FLAG(QChar::Separator_Space) | | 
| 2956 |                                    FLAG(QChar::Separator_Line) | | 
| 2957 |                                    FLAG(QChar::Separator_Paragraph)); | 
| 2958 |         yyCharClass->addRange(from: 0x0009, to: 0x000d); | 
| 2959 |         yyCharClass->addSingleton(ch: 0x0085); | 
| 2960 |         return Tok_CharClass; | 
| 2961 |     case 'w': | 
| 2962 |         // see QChar::isLetterOrNumber() and QChar::isMark() | 
| 2963 |         yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | 
| 2964 |                                    FLAG(QChar::Mark_SpacingCombining) | | 
| 2965 |                                    FLAG(QChar::Mark_Enclosing) | | 
| 2966 |                                    FLAG(QChar::Number_DecimalDigit) | | 
| 2967 |                                    FLAG(QChar::Number_Letter) | | 
| 2968 |                                    FLAG(QChar::Number_Other) | | 
| 2969 |                                    FLAG(QChar::Letter_Uppercase) | | 
| 2970 |                                    FLAG(QChar::Letter_Lowercase) | | 
| 2971 |                                    FLAG(QChar::Letter_Titlecase) | | 
| 2972 |                                    FLAG(QChar::Letter_Modifier) | | 
| 2973 |                                    FLAG(QChar::Letter_Other)); | 
| 2974 |         yyCharClass->addSingleton(ch: 0x005f); // '_' | 
| 2975 |         return Tok_CharClass; | 
| 2976 |     case 'I': | 
| 2977 |         if (!xmlSchemaExtensions) | 
| 2978 |             break; | 
| 2979 |         yyCharClass->setNegative(!yyCharClass->negative()); | 
| 2980 |         Q_FALLTHROUGH(); | 
| 2981 |     case 'i': | 
| 2982 |         if (xmlSchemaExtensions) { | 
| 2983 |             yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | 
| 2984 |                                        FLAG(QChar::Mark_SpacingCombining) | | 
| 2985 |                                        FLAG(QChar::Mark_Enclosing) | | 
| 2986 |                                        FLAG(QChar::Number_DecimalDigit) | | 
| 2987 |                                        FLAG(QChar::Number_Letter) | | 
| 2988 |                                        FLAG(QChar::Number_Other) | | 
| 2989 |                                        FLAG(QChar::Letter_Uppercase) | | 
| 2990 |                                        FLAG(QChar::Letter_Lowercase) | | 
| 2991 |                                        FLAG(QChar::Letter_Titlecase) | | 
| 2992 |                                        FLAG(QChar::Letter_Modifier) | | 
| 2993 |                                        FLAG(QChar::Letter_Other)); | 
| 2994 |             yyCharClass->addSingleton(ch: 0x003a); // ':' | 
| 2995 |             yyCharClass->addSingleton(ch: 0x005f); // '_' | 
| 2996 |             yyCharClass->addRange(from: 0x0041, to: 0x005a); // [A-Z] | 
| 2997 |             yyCharClass->addRange(from: 0x0061, to: 0x007a); // [a-z] | 
| 2998 |             yyCharClass->addRange(from: 0xc0, to: 0xd6); | 
| 2999 |             yyCharClass->addRange(from: 0xd8, to: 0xf6); | 
| 3000 |             yyCharClass->addRange(from: 0xf8, to: 0x2ff); | 
| 3001 |             yyCharClass->addRange(from: 0x370, to: 0x37d); | 
| 3002 |             yyCharClass->addRange(from: 0x37f, to: 0x1fff); | 
| 3003 |             yyCharClass->addRange(from: 0x200c, to: 0x200d); | 
| 3004 |             yyCharClass->addRange(from: 0x2070, to: 0x218f); | 
| 3005 |             yyCharClass->addRange(from: 0x2c00, to: 0x2fef); | 
| 3006 |             yyCharClass->addRange(from: 0x3001, to: 0xd7ff); | 
| 3007 |             yyCharClass->addRange(from: 0xf900, to: 0xfdcf); | 
| 3008 |             yyCharClass->addRange(from: 0xfdf0, to: 0xfffd); | 
| 3009 |             yyCharClass->addRange(from: (ushort)0x10000, to: (ushort)0xeffff); | 
| 3010 |             return Tok_CharClass; | 
| 3011 |         } else { | 
| 3012 |             break; | 
| 3013 |         } | 
| 3014 |     case 'C': | 
| 3015 |         if (!xmlSchemaExtensions) | 
| 3016 |             break; | 
| 3017 |         yyCharClass->setNegative(!yyCharClass->negative()); | 
| 3018 |         Q_FALLTHROUGH(); | 
| 3019 |     case 'c': | 
| 3020 |         if (xmlSchemaExtensions) { | 
| 3021 |             yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | 
| 3022 |                                        FLAG(QChar::Mark_SpacingCombining) | | 
| 3023 |                                        FLAG(QChar::Mark_Enclosing) | | 
| 3024 |                                        FLAG(QChar::Number_DecimalDigit) | | 
| 3025 |                                        FLAG(QChar::Number_Letter) | | 
| 3026 |                                        FLAG(QChar::Number_Other) | | 
| 3027 |                                        FLAG(QChar::Letter_Uppercase) | | 
| 3028 |                                        FLAG(QChar::Letter_Lowercase) | | 
| 3029 |                                        FLAG(QChar::Letter_Titlecase) | | 
| 3030 |                                        FLAG(QChar::Letter_Modifier) | | 
| 3031 |                                        FLAG(QChar::Letter_Other)); | 
| 3032 |             yyCharClass->addSingleton(ch: 0x002d); // '-' | 
| 3033 |             yyCharClass->addSingleton(ch: 0x002e); // '.' | 
| 3034 |             yyCharClass->addSingleton(ch: 0x003a); // ':' | 
| 3035 |             yyCharClass->addSingleton(ch: 0x005f); // '_' | 
| 3036 |             yyCharClass->addSingleton(ch: 0xb7); | 
| 3037 |             yyCharClass->addRange(from: 0x0030, to: 0x0039); // [0-9] | 
| 3038 |             yyCharClass->addRange(from: 0x0041, to: 0x005a); // [A-Z] | 
| 3039 |             yyCharClass->addRange(from: 0x0061, to: 0x007a); // [a-z] | 
| 3040 |             yyCharClass->addRange(from: 0xc0, to: 0xd6); | 
| 3041 |             yyCharClass->addRange(from: 0xd8, to: 0xf6); | 
| 3042 |             yyCharClass->addRange(from: 0xf8, to: 0x2ff); | 
| 3043 |             yyCharClass->addRange(from: 0x370, to: 0x37d); | 
| 3044 |             yyCharClass->addRange(from: 0x37f, to: 0x1fff); | 
| 3045 |             yyCharClass->addRange(from: 0x200c, to: 0x200d); | 
| 3046 |             yyCharClass->addRange(from: 0x2070, to: 0x218f); | 
| 3047 |             yyCharClass->addRange(from: 0x2c00, to: 0x2fef); | 
| 3048 |             yyCharClass->addRange(from: 0x3001, to: 0xd7ff); | 
| 3049 |             yyCharClass->addRange(from: 0xf900, to: 0xfdcf); | 
| 3050 |             yyCharClass->addRange(from: 0xfdf0, to: 0xfffd); | 
| 3051 |             yyCharClass->addRange(from: (ushort)0x10000, to: (ushort)0xeffff); | 
| 3052 |             yyCharClass->addRange(from: 0x0300, to: 0x036f); | 
| 3053 |             yyCharClass->addRange(from: 0x203f, to: 0x2040); | 
| 3054 |             return Tok_CharClass; | 
| 3055 |         } else { | 
| 3056 |             break; | 
| 3057 |         } | 
| 3058 |     case 'P': | 
| 3059 |         if (!xmlSchemaExtensions) | 
| 3060 |             break; | 
| 3061 |         yyCharClass->setNegative(!yyCharClass->negative()); | 
| 3062 |         Q_FALLTHROUGH(); | 
| 3063 |     case 'p': | 
| 3064 |         if (xmlSchemaExtensions) { | 
| 3065 |             if (yyCh != '{') { | 
| 3066 |                 error(RXERR_CHARCLASS); | 
| 3067 |                 return Tok_CharClass; | 
| 3068 |             } | 
| 3069 |  | 
| 3070 |             QByteArray category; | 
| 3071 |             yyCh = getChar(); | 
| 3072 |             while (yyCh != '}') { | 
| 3073 |                 if (yyCh == EOS) { | 
| 3074 |                     error(RXERR_END); | 
| 3075 |                     return Tok_CharClass; | 
| 3076 |                 } | 
| 3077 |                 category.append(c: yyCh); | 
| 3078 |                 yyCh = getChar(); | 
| 3079 |             } | 
| 3080 |             yyCh = getChar(); // skip closing '}' | 
| 3081 |  | 
| 3082 |             int catlen = category.size(); | 
| 3083 |             if (catlen == 1 || catlen == 2) { | 
| 3084 |                 switch (category.at(i: 0)) { | 
| 3085 |                 case 'M': | 
| 3086 |                     if (catlen == 1) { | 
| 3087 |                         yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | 
| 3088 |                                                    FLAG(QChar::Mark_SpacingCombining) | | 
| 3089 |                                                    FLAG(QChar::Mark_Enclosing)); | 
| 3090 |                     } else { | 
| 3091 |                         switch (category.at(i: 1)) { | 
| 3092 |                         case 'n': yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing)); break; // Mn | 
| 3093 |                         case 'c': yyCharClass->addCategories(FLAG(QChar::Mark_SpacingCombining)); break; // Mc | 
| 3094 |                         case 'e': yyCharClass->addCategories(FLAG(QChar::Mark_Enclosing)); break; // Me | 
| 3095 |                         default: error(RXERR_CATEGORY); break; | 
| 3096 |                         } | 
| 3097 |                     } | 
| 3098 |                     break; | 
| 3099 |                 case 'N': | 
| 3100 |                     if (catlen == 1) { | 
| 3101 |                         yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit) | | 
| 3102 |                                                    FLAG(QChar::Number_Letter) | | 
| 3103 |                                                    FLAG(QChar::Number_Other)); | 
| 3104 |                     } else { | 
| 3105 |                         switch (category.at(i: 1)) { | 
| 3106 |                         case 'd': yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); break; // Nd | 
| 3107 |                         case 'l': yyCharClass->addCategories(FLAG(QChar::Number_Letter)); break; // Hl | 
| 3108 |                         case 'o': yyCharClass->addCategories(FLAG(QChar::Number_Other)); break; // No | 
| 3109 |                         default: error(RXERR_CATEGORY); break; | 
| 3110 |                         } | 
| 3111 |                     } | 
| 3112 |                     break; | 
| 3113 |                 case 'Z': | 
| 3114 |                     if (catlen == 1) { | 
| 3115 |                         yyCharClass->addCategories(FLAG(QChar::Separator_Space) | | 
| 3116 |                                                    FLAG(QChar::Separator_Line) | | 
| 3117 |                                                    FLAG(QChar::Separator_Paragraph)); | 
| 3118 |                     } else { | 
| 3119 |                         switch (category.at(i: 1)) { | 
| 3120 |                         case 's': yyCharClass->addCategories(FLAG(QChar::Separator_Space)); break; // Zs | 
| 3121 |                         case 'l': yyCharClass->addCategories(FLAG(QChar::Separator_Line)); break; // Zl | 
| 3122 |                         case 'p': yyCharClass->addCategories(FLAG(QChar::Separator_Paragraph)); break; // Zp | 
| 3123 |                         default: error(RXERR_CATEGORY); break; | 
| 3124 |                         } | 
| 3125 |                     } | 
| 3126 |                     break; | 
| 3127 |                 case 'C': | 
| 3128 |                     if (catlen == 1) { | 
| 3129 |                         yyCharClass->addCategories(FLAG(QChar::Other_Control) | | 
| 3130 |                                                    FLAG(QChar::Other_Format) | | 
| 3131 |                                                    FLAG(QChar::Other_Surrogate) | | 
| 3132 |                                                    FLAG(QChar::Other_PrivateUse) | | 
| 3133 |                                                    FLAG(QChar::Other_NotAssigned)); | 
| 3134 |                     } else { | 
| 3135 |                         switch (category.at(i: 1)) { | 
| 3136 |                         case 'c': yyCharClass->addCategories(FLAG(QChar::Other_Control)); break; // Cc | 
| 3137 |                         case 'f': yyCharClass->addCategories(FLAG(QChar::Other_Format)); break; // Cf | 
| 3138 |                         case 's': yyCharClass->addCategories(FLAG(QChar::Other_Surrogate)); break; // Cs | 
| 3139 |                         case 'o': yyCharClass->addCategories(FLAG(QChar::Other_PrivateUse)); break; // Co | 
| 3140 |                         case 'n': yyCharClass->addCategories(FLAG(QChar::Other_NotAssigned)); break; // Cn | 
| 3141 |                         default: error(RXERR_CATEGORY); break; | 
| 3142 |                         } | 
| 3143 |                     } | 
| 3144 |                     break; | 
| 3145 |                 case 'L': | 
| 3146 |                     if (catlen == 1) { | 
| 3147 |                         yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase) | | 
| 3148 |                                                    FLAG(QChar::Letter_Lowercase) | | 
| 3149 |                                                    FLAG(QChar::Letter_Titlecase) | | 
| 3150 |                                                    FLAG(QChar::Letter_Modifier) | | 
| 3151 |                                                    FLAG(QChar::Letter_Other)); | 
| 3152 |                     } else { | 
| 3153 |                         switch (category.at(i: 1)) { | 
| 3154 |                         case 'u': yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase)); break; // Lu | 
| 3155 |                         case 'l': yyCharClass->addCategories(FLAG(QChar::Letter_Lowercase)); break; // Ll | 
| 3156 |                         case 't': yyCharClass->addCategories(FLAG(QChar::Letter_Titlecase)); break; // Lt | 
| 3157 |                         case 'm': yyCharClass->addCategories(FLAG(QChar::Letter_Modifier)); break; // Lm | 
| 3158 |                         case 'o': yyCharClass->addCategories(FLAG(QChar::Letter_Other)); break; // Lo | 
| 3159 |                         default: error(RXERR_CATEGORY); break; | 
| 3160 |                         } | 
| 3161 |                     } | 
| 3162 |                     break; | 
| 3163 |                 case 'P': | 
| 3164 |                     if (catlen == 1) { | 
| 3165 |                         yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector) | | 
| 3166 |                                                    FLAG(QChar::Punctuation_Dash) | | 
| 3167 |                                                    FLAG(QChar::Punctuation_Open) | | 
| 3168 |                                                    FLAG(QChar::Punctuation_Close) | | 
| 3169 |                                                    FLAG(QChar::Punctuation_InitialQuote) | | 
| 3170 |                                                    FLAG(QChar::Punctuation_FinalQuote) | | 
| 3171 |                                                    FLAG(QChar::Punctuation_Other)); | 
| 3172 |                     } else { | 
| 3173 |                         switch (category.at(i: 1)) { | 
| 3174 |                         case 'c': yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector)); break; // Pc | 
| 3175 |                         case 'd': yyCharClass->addCategories(FLAG(QChar::Punctuation_Dash)); break; // Pd | 
| 3176 |                         case 's': yyCharClass->addCategories(FLAG(QChar::Punctuation_Open)); break; // Ps | 
| 3177 |                         case 'e': yyCharClass->addCategories(FLAG(QChar::Punctuation_Close)); break; // Pe | 
| 3178 |                         case 'i': yyCharClass->addCategories(FLAG(QChar::Punctuation_InitialQuote)); break; // Pi | 
| 3179 |                         case 'f': yyCharClass->addCategories(FLAG(QChar::Punctuation_FinalQuote)); break; // Pf | 
| 3180 |                         case 'o': yyCharClass->addCategories(FLAG(QChar::Punctuation_Other)); break; // Po | 
| 3181 |                         default: error(RXERR_CATEGORY); break; | 
| 3182 |                         } | 
| 3183 |                     } | 
| 3184 |                     break; | 
| 3185 |                 case 'S': | 
| 3186 |                     if (catlen == 1) { | 
| 3187 |                         yyCharClass->addCategories(FLAG(QChar::Symbol_Math) | | 
| 3188 |                                                    FLAG(QChar::Symbol_Currency) | | 
| 3189 |                                                    FLAG(QChar::Symbol_Modifier) | | 
| 3190 |                                                    FLAG(QChar::Symbol_Other)); | 
| 3191 |                     } else { | 
| 3192 |                         switch (category.at(i: 1)) { | 
| 3193 |                         case 'm': yyCharClass->addCategories(FLAG(QChar::Symbol_Math)); break; // Sm | 
| 3194 |                         case 'c': yyCharClass->addCategories(FLAG(QChar::Symbol_Currency)); break; // Sc | 
| 3195 |                         case 'k': yyCharClass->addCategories(FLAG(QChar::Symbol_Modifier)); break; // Sk | 
| 3196 |                         case 'o': yyCharClass->addCategories(FLAG(QChar::Symbol_Other)); break; // So | 
| 3197 |                         default: error(RXERR_CATEGORY); break; | 
| 3198 |                         } | 
| 3199 |                     } | 
| 3200 |                     break; | 
| 3201 |                 default: | 
| 3202 |                     error(RXERR_CATEGORY); | 
| 3203 |                     break; | 
| 3204 |                 } | 
| 3205 |             } else if (catlen > 2 && category.at(i: 0) == 'I' && category.at(i: 1) == 's') { | 
| 3206 |                 static const int N = sizeof(categoriesRangeMap) / sizeof(categoriesRangeMap[0]); | 
| 3207 |                 const char * const categoryFamily = category.constData() + 2; | 
| 3208 |                 const CategoriesRangeMapEntry *r = std::lower_bound(first: categoriesRangeMap, last: categoriesRangeMap + N, val: categoryFamily); | 
| 3209 |                 if (r != categoriesRangeMap + N && qstrcmp(str1: r->name, str2: categoryFamily) == 0) | 
| 3210 |                     yyCharClass->addRange(from: r->first, to: r->second); | 
| 3211 |                 else | 
| 3212 |                     error(RXERR_CATEGORY); | 
| 3213 |             } else { | 
| 3214 |                 error(RXERR_CATEGORY); | 
| 3215 |             } | 
| 3216 |             return Tok_CharClass; | 
| 3217 |         } else { | 
| 3218 |             break; | 
| 3219 |         } | 
| 3220 | #endif | 
| 3221 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 3222 |     case 'x': | 
| 3223 |         val = 0; | 
| 3224 |         for (i = 0; i < 4; i++) { | 
| 3225 |             low = QChar(yyCh).toLower().unicode(); | 
| 3226 |             if (low >= '0' && low <= '9') | 
| 3227 |                 val = (val << 4) | (low - '0'); | 
| 3228 |             else if (low >= 'a' && low <= 'f') | 
| 3229 |                 val = (val << 4) | (low - 'a' + 10); | 
| 3230 |             else | 
| 3231 |                 break; | 
| 3232 |             yyCh = getChar(); | 
| 3233 |         } | 
| 3234 |         return Tok_Char | val; | 
| 3235 | #endif | 
| 3236 |     default: | 
| 3237 |         break; | 
| 3238 |     } | 
| 3239 |     if (prevCh >= '1' && prevCh <= '9') { | 
| 3240 | #ifndef QT_NO_REGEXP_BACKREF | 
| 3241 |         val = prevCh - '0'; | 
| 3242 |         while (yyCh >= '0' && yyCh <= '9') { | 
| 3243 |             val = (val * 10) + (yyCh - '0'); | 
| 3244 |             yyCh = getChar(); | 
| 3245 |         } | 
| 3246 |         return Tok_BackRef | val; | 
| 3247 | #else | 
| 3248 |         error(RXERR_DISABLED); | 
| 3249 | #endif | 
| 3250 |     } | 
| 3251 |     return Tok_Char | prevCh; | 
| 3252 | } | 
| 3253 |  | 
| 3254 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3255 | int QRegExpEngine::getRep(int def) | 
| 3256 | { | 
| 3257 |     if (yyCh >= '0' && yyCh <= '9') { | 
| 3258 |         int rep = 0; | 
| 3259 |         do { | 
| 3260 |             rep = 10 * rep + yyCh - '0'; | 
| 3261 |             if (rep >= InftyRep) { | 
| 3262 |                 error(RXERR_REPETITION); | 
| 3263 |                 rep = def; | 
| 3264 |             } | 
| 3265 |             yyCh = getChar(); | 
| 3266 |         } while (yyCh >= '0' && yyCh <= '9'); | 
| 3267 |         return rep; | 
| 3268 |     } else { | 
| 3269 |         return def; | 
| 3270 |     } | 
| 3271 | } | 
| 3272 | #endif | 
| 3273 |  | 
| 3274 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 3275 | void QRegExpEngine::skipChars(int n) | 
| 3276 | { | 
| 3277 |     if (n > 0) { | 
| 3278 |         yyPos += n - 1; | 
| 3279 |         yyCh = getChar(); | 
| 3280 |     } | 
| 3281 | } | 
| 3282 | #endif | 
| 3283 |  | 
| 3284 | void QRegExpEngine::error(const char *msg) | 
| 3285 | { | 
| 3286 |     if (yyError.isEmpty()) | 
| 3287 |         yyError = QLatin1String(msg); | 
| 3288 | } | 
| 3289 |  | 
| 3290 | void QRegExpEngine::startTokenizer(const QChar *rx, int len) | 
| 3291 | { | 
| 3292 |     yyIn = rx; | 
| 3293 |     yyPos0 = 0; | 
| 3294 |     yyPos = 0; | 
| 3295 |     yyLen = len; | 
| 3296 |     yyCh = getChar(); | 
| 3297 |     yyCharClass.emplace(); | 
| 3298 |     yyMinRep = 0; | 
| 3299 |     yyMaxRep = 0; | 
| 3300 |     yyError = QString(); | 
| 3301 | } | 
| 3302 |  | 
| 3303 | int QRegExpEngine::getToken() | 
| 3304 | { | 
| 3305 | #ifndef QT_NO_REGEXP_CCLASS | 
| 3306 |     ushort pendingCh = 0; | 
| 3307 |     bool charPending; | 
| 3308 |     bool rangePending; | 
| 3309 |     int tok; | 
| 3310 | #endif | 
| 3311 |     int prevCh = yyCh; | 
| 3312 |  | 
| 3313 |     yyPos0 = yyPos - 1; | 
| 3314 | #ifndef QT_NO_REGEXP_CCLASS | 
| 3315 |     yyCharClass->clear(); | 
| 3316 | #endif | 
| 3317 |     yyMinRep = 0; | 
| 3318 |     yyMaxRep = 0; | 
| 3319 |     yyCh = getChar(); | 
| 3320 |  | 
| 3321 |     switch (prevCh) { | 
| 3322 |     case EOS: | 
| 3323 |         yyPos0 = yyPos; | 
| 3324 |         return Tok_Eos; | 
| 3325 |     case '$': | 
| 3326 |         return Tok_Dollar; | 
| 3327 |     case '(': | 
| 3328 |         if (yyCh == '?') { | 
| 3329 |             prevCh = getChar(); | 
| 3330 |             yyCh = getChar(); | 
| 3331 |             switch (prevCh) { | 
| 3332 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 3333 |             case '!': | 
| 3334 |                 return Tok_NegLookahead; | 
| 3335 |             case '=': | 
| 3336 |                 return Tok_PosLookahead; | 
| 3337 | #endif | 
| 3338 |             case ':': | 
| 3339 |                 return Tok_MagicLeftParen; | 
| 3340 |             case '<': | 
| 3341 |                 error(RXERR_LOOKBEHIND); | 
| 3342 |                 return Tok_MagicLeftParen; | 
| 3343 |             default: | 
| 3344 |                 error(RXERR_LOOKAHEAD); | 
| 3345 |                 return Tok_MagicLeftParen; | 
| 3346 |             } | 
| 3347 |         } else { | 
| 3348 |             return Tok_LeftParen; | 
| 3349 |         } | 
| 3350 |     case ')': | 
| 3351 |         return Tok_RightParen; | 
| 3352 |     case '*': | 
| 3353 |         yyMinRep = 0; | 
| 3354 |         yyMaxRep = InftyRep; | 
| 3355 |         return Tok_Quantifier; | 
| 3356 |     case '+': | 
| 3357 |         yyMinRep = 1; | 
| 3358 |         yyMaxRep = InftyRep; | 
| 3359 |         return Tok_Quantifier; | 
| 3360 |     case '.': | 
| 3361 | #ifndef QT_NO_REGEXP_CCLASS | 
| 3362 |         yyCharClass->setNegative(true); | 
| 3363 | #endif | 
| 3364 |         return Tok_CharClass; | 
| 3365 |     case '?': | 
| 3366 |         yyMinRep = 0; | 
| 3367 |         yyMaxRep = 1; | 
| 3368 |         return Tok_Quantifier; | 
| 3369 |     case '[': | 
| 3370 | #ifndef QT_NO_REGEXP_CCLASS | 
| 3371 |         if (yyCh == '^') { | 
| 3372 |             yyCharClass->setNegative(true); | 
| 3373 |             yyCh = getChar(); | 
| 3374 |         } | 
| 3375 |         charPending = false; | 
| 3376 |         rangePending = false; | 
| 3377 |         do { | 
| 3378 |             if (yyCh == '-' && charPending && !rangePending) { | 
| 3379 |                 rangePending = true; | 
| 3380 |                 yyCh = getChar(); | 
| 3381 |             } else { | 
| 3382 |                 if (charPending && !rangePending) { | 
| 3383 |                     yyCharClass->addSingleton(ch: pendingCh); | 
| 3384 |                     charPending = false; | 
| 3385 |                 } | 
| 3386 |                 if (yyCh == '\\') { | 
| 3387 |                     yyCh = getChar(); | 
| 3388 |                     tok = getEscape(); | 
| 3389 |                     if (tok == Tok_Word) | 
| 3390 |                         tok = '\b'; | 
| 3391 |                 } else { | 
| 3392 |                     tok = Tok_Char | yyCh; | 
| 3393 |                     yyCh = getChar(); | 
| 3394 |                 } | 
| 3395 |                 if (tok == Tok_CharClass) { | 
| 3396 |                     if (rangePending) { | 
| 3397 |                         yyCharClass->addSingleton(ch: '-'); | 
| 3398 |                         yyCharClass->addSingleton(ch: pendingCh); | 
| 3399 |                         charPending = false; | 
| 3400 |                         rangePending = false; | 
| 3401 |                     } | 
| 3402 |                 } else if ((tok & Tok_Char) != 0) { | 
| 3403 |                     if (rangePending) { | 
| 3404 |                         yyCharClass->addRange(from: pendingCh, to: tok ^ Tok_Char); | 
| 3405 |                         charPending = false; | 
| 3406 |                         rangePending = false; | 
| 3407 |                     } else { | 
| 3408 |                         pendingCh = tok ^ Tok_Char; | 
| 3409 |                         charPending = true; | 
| 3410 |                     } | 
| 3411 |                 } else { | 
| 3412 |                     error(RXERR_CHARCLASS); | 
| 3413 |                 } | 
| 3414 |             } | 
| 3415 |         }  while (yyCh != ']' && yyCh != EOS); | 
| 3416 |         if (rangePending) | 
| 3417 |             yyCharClass->addSingleton(ch: '-'); | 
| 3418 |         if (charPending) | 
| 3419 |             yyCharClass->addSingleton(ch: pendingCh); | 
| 3420 |         if (yyCh == EOS) | 
| 3421 |             error(RXERR_END); | 
| 3422 |         else | 
| 3423 |             yyCh = getChar(); | 
| 3424 |         return Tok_CharClass; | 
| 3425 | #else | 
| 3426 |         error(RXERR_END); | 
| 3427 |         return Tok_Char | '['; | 
| 3428 | #endif | 
| 3429 |     case '\\': | 
| 3430 |         return getEscape(); | 
| 3431 |     case ']': | 
| 3432 |         error(RXERR_LEFTDELIM); | 
| 3433 |         return Tok_Char | ']'; | 
| 3434 |     case '^': | 
| 3435 |         return Tok_Caret; | 
| 3436 |     case '{': | 
| 3437 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3438 |         yyMinRep = getRep(def: 0); | 
| 3439 |         yyMaxRep = yyMinRep; | 
| 3440 |         if (yyCh == ',') { | 
| 3441 |             yyCh = getChar(); | 
| 3442 |             yyMaxRep = getRep(def: InftyRep); | 
| 3443 |         } | 
| 3444 |         if (yyMaxRep < yyMinRep) | 
| 3445 |             error(RXERR_INTERVAL); | 
| 3446 |         if (yyCh != '}') | 
| 3447 |             error(RXERR_REPETITION); | 
| 3448 |         yyCh = getChar(); | 
| 3449 |         return Tok_Quantifier; | 
| 3450 | #else | 
| 3451 |         error(RXERR_DISABLED); | 
| 3452 |         return Tok_Char | '{'; | 
| 3453 | #endif | 
| 3454 |     case '|': | 
| 3455 |         return Tok_Bar; | 
| 3456 |     case '}': | 
| 3457 |         error(RXERR_LEFTDELIM); | 
| 3458 |         return Tok_Char | '}'; | 
| 3459 |     default: | 
| 3460 |         return Tok_Char | prevCh; | 
| 3461 |     } | 
| 3462 | } | 
| 3463 |  | 
| 3464 | int QRegExpEngine::parse(const QChar *pattern, int len) | 
| 3465 | { | 
| 3466 |     valid = true; | 
| 3467 |     startTokenizer(rx: pattern, len); | 
| 3468 |     yyTok = getToken(); | 
| 3469 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3470 |     yyMayCapture = true; | 
| 3471 | #else | 
| 3472 |     yyMayCapture = false; | 
| 3473 | #endif | 
| 3474 |  | 
| 3475 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3476 |     int atom = startAtom(officialCapture: false); | 
| 3477 | #endif | 
| 3478 |     QRegExpCharClass anything; | 
| 3479 |     Box box(this); // create InitialState | 
| 3480 |     box.set(anything); | 
| 3481 |     Box rightBox(this); // create FinalState | 
| 3482 |     rightBox.set(anything); | 
| 3483 |  | 
| 3484 |     Box middleBox(this); | 
| 3485 |     parseExpression(box: &middleBox); | 
| 3486 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3487 |     finishAtom(atom, needCapture: false); | 
| 3488 | #endif | 
| 3489 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3490 |     middleBox.setupHeuristics(); | 
| 3491 | #endif | 
| 3492 |     box.cat(b: middleBox); | 
| 3493 |     box.cat(b: rightBox); | 
| 3494 |     yyCharClass.reset(); | 
| 3495 |  | 
| 3496 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3497 |     for (int i = 0; i < nf; ++i) { | 
| 3498 |         switch (f[i].capture) { | 
| 3499 |         case QRegExpAtom::NoCapture: | 
| 3500 |             break; | 
| 3501 |         case QRegExpAtom::OfficialCapture: | 
| 3502 |             f[i].capture = ncap; | 
| 3503 |             captureForOfficialCapture.append(t: ncap); | 
| 3504 |             ++ncap; | 
| 3505 |             ++officialncap; | 
| 3506 |             break; | 
| 3507 |         case QRegExpAtom::UnofficialCapture: | 
| 3508 |             f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture; | 
| 3509 |         } | 
| 3510 |     } | 
| 3511 |  | 
| 3512 | #ifndef QT_NO_REGEXP_BACKREF | 
| 3513 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3514 |     if (officialncap == 0 && nbrefs == 0) { | 
| 3515 |         ncap = nf = 0; | 
| 3516 |         f.clear(); | 
| 3517 |     } | 
| 3518 | #endif | 
| 3519 |     // handle the case where there's a \5 with no corresponding capture | 
| 3520 |     // (captureForOfficialCapture.size() != officialncap) | 
| 3521 |     for (int i = 0; i < nbrefs - officialncap; ++i) { | 
| 3522 |         captureForOfficialCapture.append(t: ncap); | 
| 3523 |         ++ncap; | 
| 3524 |     } | 
| 3525 | #endif | 
| 3526 | #endif | 
| 3527 |  | 
| 3528 |     if (!yyError.isEmpty()) | 
| 3529 |         return -1; | 
| 3530 |  | 
| 3531 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3532 |     const QRegExpAutomatonState &sinit = s.at(i: InitialState); | 
| 3533 |     caretAnchored = !sinit.anchors.isEmpty(); | 
| 3534 |     if (caretAnchored) { | 
| 3535 |         const QMap<int, int> &anchors = sinit.anchors; | 
| 3536 |         QMap<int, int>::const_iterator a; | 
| 3537 |         for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) { | 
| 3538 |             if ( | 
| 3539 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 
| 3540 |                 (*a & Anchor_Alternation) != 0 || | 
| 3541 | #endif | 
| 3542 |                 (*a & Anchor_Caret) == 0) | 
| 3543 |             { | 
| 3544 |                 caretAnchored = false; | 
| 3545 |                 break; | 
| 3546 |             } | 
| 3547 |         } | 
| 3548 |     } | 
| 3549 | #endif | 
| 3550 |  | 
| 3551 |     // cleanup anchors | 
| 3552 |     int numStates = s.size(); | 
| 3553 |     for (int i = 0; i < numStates; ++i) { | 
| 3554 |         QRegExpAutomatonState &state = s[i]; | 
| 3555 |         if (!state.anchors.isEmpty()) { | 
| 3556 |             QMap<int, int>::iterator a = state.anchors.begin(); | 
| 3557 |             while (a != state.anchors.end()) { | 
| 3558 |                 if (a.value() == 0) | 
| 3559 |                     a = state.anchors.erase(it: a); | 
| 3560 |                 else | 
| 3561 |                     ++a; | 
| 3562 |             } | 
| 3563 |         } | 
| 3564 |     } | 
| 3565 |  | 
| 3566 |     return yyPos0; | 
| 3567 | } | 
| 3568 |  | 
| 3569 | void QRegExpEngine::parseAtom(Box *box) | 
| 3570 | { | 
| 3571 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 3572 |     QRegExpEngine *eng = nullptr; | 
| 3573 |     bool neg; | 
| 3574 |     int len; | 
| 3575 | #endif | 
| 3576 |  | 
| 3577 |     if ((yyTok & Tok_Char) != 0) { | 
| 3578 |         box->set(QChar(yyTok ^ Tok_Char)); | 
| 3579 |     } else { | 
| 3580 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3581 |         trivial = false; | 
| 3582 | #endif | 
| 3583 |         switch (yyTok) { | 
| 3584 |         case Tok_Dollar: | 
| 3585 |             box->catAnchor(a: Anchor_Dollar); | 
| 3586 |             break; | 
| 3587 |         case Tok_Caret: | 
| 3588 |             box->catAnchor(a: Anchor_Caret); | 
| 3589 |             break; | 
| 3590 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 
| 3591 |         case Tok_PosLookahead: | 
| 3592 |         case Tok_NegLookahead: | 
| 3593 |             neg = (yyTok == Tok_NegLookahead); | 
| 3594 |             eng = new QRegExpEngine(cs, greedyQuantifiers); | 
| 3595 |             len = eng->parse(pattern: yyIn + yyPos - 1, len: yyLen - yyPos + 1); | 
| 3596 |             if (len >= 0) | 
| 3597 |                 skipChars(n: len); | 
| 3598 |             else | 
| 3599 |                 error(RXERR_LOOKAHEAD); | 
| 3600 |             box->catAnchor(a: addLookahead(eng, negative: neg)); | 
| 3601 |             yyTok = getToken(); | 
| 3602 |             if (yyTok != Tok_RightParen) | 
| 3603 |                 error(RXERR_LOOKAHEAD); | 
| 3604 |             break; | 
| 3605 | #endif | 
| 3606 | #ifndef QT_NO_REGEXP_ESCAPE | 
| 3607 |         case Tok_Word: | 
| 3608 |             box->catAnchor(a: Anchor_Word); | 
| 3609 |             break; | 
| 3610 |         case Tok_NonWord: | 
| 3611 |             box->catAnchor(a: Anchor_NonWord); | 
| 3612 |             break; | 
| 3613 | #endif | 
| 3614 |         case Tok_LeftParen: | 
| 3615 |         case Tok_MagicLeftParen: | 
| 3616 |             yyTok = getToken(); | 
| 3617 |             parseExpression(box); | 
| 3618 |             if (yyTok != Tok_RightParen) | 
| 3619 |                 error(RXERR_END); | 
| 3620 |             break; | 
| 3621 |         case Tok_CharClass: | 
| 3622 |             box->set(*yyCharClass); | 
| 3623 |             break; | 
| 3624 |         case Tok_Quantifier: | 
| 3625 |             error(RXERR_REPETITION); | 
| 3626 |             break; | 
| 3627 |         default: | 
| 3628 | #ifndef QT_NO_REGEXP_BACKREF | 
| 3629 |             if ((yyTok & Tok_BackRef) != 0) | 
| 3630 |                 box->set(yyTok ^ Tok_BackRef); | 
| 3631 |             else | 
| 3632 | #endif | 
| 3633 |                 error(RXERR_DISABLED); | 
| 3634 |         } | 
| 3635 |     } | 
| 3636 |     yyTok = getToken(); | 
| 3637 | } | 
| 3638 |  | 
| 3639 | void QRegExpEngine::parseFactor(Box *box) | 
| 3640 | { | 
| 3641 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3642 |     int outerAtom = greedyQuantifiers ? startAtom(officialCapture: false) : -1; | 
| 3643 |     int innerAtom = startAtom(officialCapture: yyMayCapture && yyTok == Tok_LeftParen); | 
| 3644 |     bool magicLeftParen = (yyTok == Tok_MagicLeftParen); | 
| 3645 | #else | 
| 3646 |     const int innerAtom = -1; | 
| 3647 | #endif | 
| 3648 |  | 
| 3649 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3650 | #define YYREDO() \ | 
| 3651 |         yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \ | 
| 3652 |         *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok | 
| 3653 |  | 
| 3654 |     const QChar *in = yyIn; | 
| 3655 |     int pos0 = yyPos0; | 
| 3656 |     int pos = yyPos; | 
| 3657 |     int len = yyLen; | 
| 3658 |     int ch = yyCh; | 
| 3659 |     QRegExpCharClass charClass; | 
| 3660 |     if (yyTok == Tok_CharClass) | 
| 3661 |         charClass = *yyCharClass; | 
| 3662 |     int tok = yyTok; | 
| 3663 |     bool mayCapture = yyMayCapture; | 
| 3664 | #endif | 
| 3665 |  | 
| 3666 |     parseAtom(box); | 
| 3667 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3668 |     finishAtom(atom: innerAtom, needCapture: magicLeftParen); | 
| 3669 | #endif | 
| 3670 |  | 
| 3671 |     bool hasQuantifier = (yyTok == Tok_Quantifier); | 
| 3672 |     if (hasQuantifier) { | 
| 3673 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3674 |         trivial = false; | 
| 3675 | #endif | 
| 3676 |         if (yyMaxRep == InftyRep) { | 
| 3677 |             box->plus(atom: innerAtom); | 
| 3678 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3679 |         } else if (yyMaxRep == 0) { | 
| 3680 |             box->clear(); | 
| 3681 | #endif | 
| 3682 |         } | 
| 3683 |         if (yyMinRep == 0) | 
| 3684 |             box->opt(); | 
| 3685 |  | 
| 3686 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3687 |         yyMayCapture = false; | 
| 3688 |         int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1; | 
| 3689 |         int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1); | 
| 3690 |  | 
| 3691 |         Box rightBox(this); | 
| 3692 |         int i; | 
| 3693 |  | 
| 3694 |         for (i = 0; i < beta; i++) { | 
| 3695 |             YYREDO(); | 
| 3696 |             Box leftBox(this); | 
| 3697 |             parseAtom(box: &leftBox); | 
| 3698 |             leftBox.cat(b: rightBox); | 
| 3699 |             leftBox.opt(); | 
| 3700 |             rightBox = leftBox; | 
| 3701 |         } | 
| 3702 |         for (i = 0; i < alpha; i++) { | 
| 3703 |             YYREDO(); | 
| 3704 |             Box leftBox(this); | 
| 3705 |             parseAtom(box: &leftBox); | 
| 3706 |             leftBox.cat(b: rightBox); | 
| 3707 |             rightBox = leftBox; | 
| 3708 |         } | 
| 3709 |         rightBox.cat(b: *box); | 
| 3710 |         *box = rightBox; | 
| 3711 | #endif | 
| 3712 |         yyTok = getToken(); | 
| 3713 | #ifndef QT_NO_REGEXP_INTERVAL | 
| 3714 |         yyMayCapture = mayCapture; | 
| 3715 | #endif | 
| 3716 |     } | 
| 3717 | #undef YYREDO | 
| 3718 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3719 |     if (greedyQuantifiers) | 
| 3720 |         finishAtom(atom: outerAtom, needCapture: hasQuantifier); | 
| 3721 | #endif | 
| 3722 | } | 
| 3723 |  | 
| 3724 | void QRegExpEngine::parseTerm(Box *box) | 
| 3725 | { | 
| 3726 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3727 |     if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) | 
| 3728 |         parseFactor(box); | 
| 3729 | #endif | 
| 3730 |     while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) { | 
| 3731 |         Box rightBox(this); | 
| 3732 |         parseFactor(box: &rightBox); | 
| 3733 |         box->cat(b: rightBox); | 
| 3734 |     } | 
| 3735 | } | 
| 3736 |  | 
| 3737 | void QRegExpEngine::parseExpression(Box *box) | 
| 3738 | { | 
| 3739 |     parseTerm(box); | 
| 3740 |     while (yyTok == Tok_Bar) { | 
| 3741 | #ifndef QT_NO_REGEXP_OPTIM | 
| 3742 |         trivial = false; | 
| 3743 | #endif | 
| 3744 |         Box rightBox(this); | 
| 3745 |         yyTok = getToken(); | 
| 3746 |         parseTerm(box: &rightBox); | 
| 3747 |         box->orx(b: rightBox); | 
| 3748 |     } | 
| 3749 | } | 
| 3750 |  | 
| 3751 | /* | 
| 3752 |   The struct QRegExpPrivate contains the private data of a regular | 
| 3753 |   expression other than the automaton. It makes it possible for many | 
| 3754 |   QRegExp objects to use the same QRegExpEngine object with different | 
| 3755 |   QRegExpPrivate objects. | 
| 3756 | */ | 
| 3757 | struct QRegExpPrivate | 
| 3758 | { | 
| 3759 |     QRegExpEngine *eng; | 
| 3760 |     QRegExpEngineKey engineKey; | 
| 3761 |     bool minimal; | 
| 3762 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3763 |     QString t; // last string passed to QRegExp::indexIn() or lastIndexIn() | 
| 3764 |     QStringList capturedCache; // what QRegExp::capturedTexts() returned last | 
| 3765 | #endif | 
| 3766 |     QRegExpMatchState matchState; | 
| 3767 |  | 
| 3768 |     inline QRegExpPrivate() | 
| 3769 |         : eng(nullptr), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { } | 
| 3770 |     inline QRegExpPrivate(const QRegExpEngineKey &key) | 
| 3771 |         : eng(nullptr), engineKey(key), minimal(false) {} | 
| 3772 | }; | 
| 3773 |  | 
| 3774 | #if !defined(QT_NO_REGEXP_OPTIM) | 
| 3775 | struct QRECache | 
| 3776 | { | 
| 3777 |     typedef QHash<QRegExpEngineKey, QRegExpEngine *> EngineCache; | 
| 3778 |     typedef QCache<QRegExpEngineKey, QRegExpEngine> UnusedEngineCache; | 
| 3779 |     EngineCache usedEngines; | 
| 3780 |     UnusedEngineCache unusedEngines; | 
| 3781 | }; | 
| 3782 | Q_GLOBAL_STATIC(QRECache, engineCache) | 
| 3783 | static QBasicMutex engineCacheMutex; | 
| 3784 | #endif // QT_NO_REGEXP_OPTIM | 
| 3785 |  | 
| 3786 | static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key) | 
| 3787 | { | 
| 3788 | #if !defined(QT_NO_REGEXP_OPTIM) | 
| 3789 |     const auto locker = qt_scoped_lock(mutex&: engineCacheMutex); | 
| 3790 |     if (!eng->ref.deref()) { | 
| 3791 |         if (QRECache *c = engineCache()) { | 
| 3792 |             c->unusedEngines.insert(key, object: eng, cost: 4 + key.pattern.size() / 4); | 
| 3793 |             c->usedEngines.remove(key); | 
| 3794 |         } else { | 
| 3795 |             delete eng; | 
| 3796 |         } | 
| 3797 |     } | 
| 3798 | #else | 
| 3799 |     Q_UNUSED(key); | 
| 3800 |     if (!eng->ref.deref()) | 
| 3801 |         delete eng; | 
| 3802 | #endif | 
| 3803 | } | 
| 3804 |  | 
| 3805 | static void prepareEngine_helper(QRegExpPrivate *priv) | 
| 3806 | { | 
| 3807 |     Q_ASSERT(!priv->eng); | 
| 3808 |  | 
| 3809 | #if !defined(QT_NO_REGEXP_OPTIM) | 
| 3810 |     const auto locker = qt_scoped_lock(mutex&: engineCacheMutex); | 
| 3811 |     if (QRECache *c = engineCache()) { | 
| 3812 |         priv->eng = c->unusedEngines.take(key: priv->engineKey); | 
| 3813 |         if (!priv->eng) | 
| 3814 |             priv->eng = c->usedEngines.value(key: priv->engineKey); | 
| 3815 |         if (!priv->eng) | 
| 3816 |             priv->eng = new QRegExpEngine(priv->engineKey); | 
| 3817 |         else | 
| 3818 |             priv->eng->ref.ref(); | 
| 3819 |  | 
| 3820 |         c->usedEngines.insert(key: priv->engineKey, value: priv->eng); | 
| 3821 |         return; | 
| 3822 |     } | 
| 3823 | #endif // QT_NO_REGEXP_OPTIM | 
| 3824 |  | 
| 3825 |     priv->eng = new QRegExpEngine(priv->engineKey); | 
| 3826 | } | 
| 3827 |  | 
| 3828 | inline static void prepareEngine(QRegExpPrivate *priv) | 
| 3829 | { | 
| 3830 |     if (priv->eng) | 
| 3831 |         return; | 
| 3832 |     prepareEngine_helper(priv); | 
| 3833 |     priv->matchState.prepareForMatch(eng: priv->eng); | 
| 3834 | } | 
| 3835 |  | 
| 3836 | static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str) | 
| 3837 | { | 
| 3838 |     prepareEngine(priv); | 
| 3839 |     priv->matchState.prepareForMatch(eng: priv->eng); | 
| 3840 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3841 |     priv->t = str; | 
| 3842 |     priv->capturedCache.clear(); | 
| 3843 | #else | 
| 3844 |     Q_UNUSED(str); | 
| 3845 | #endif | 
| 3846 | } | 
| 3847 |  | 
| 3848 | static void invalidateEngine(QRegExpPrivate *priv) | 
| 3849 | { | 
| 3850 |     if (priv->eng) { | 
| 3851 |         derefEngine(eng: priv->eng, key: priv->engineKey); | 
| 3852 |         priv->eng = nullptr; | 
| 3853 |         priv->matchState.drain(); | 
| 3854 |     } | 
| 3855 | } | 
| 3856 |  | 
| 3857 | /*! | 
| 3858 |     \enum QRegExp::CaretMode | 
| 3859 |  | 
| 3860 |     The CaretMode enum defines the different meanings of the caret | 
| 3861 |     (\b{^}) in a regular expression. The possible values are: | 
| 3862 |  | 
| 3863 |     \value CaretAtZero | 
| 3864 |            The caret corresponds to index 0 in the searched string. | 
| 3865 |  | 
| 3866 |     \value CaretAtOffset | 
| 3867 |            The caret corresponds to the start offset of the search. | 
| 3868 |  | 
| 3869 |     \value CaretWontMatch | 
| 3870 |            The caret never matches. | 
| 3871 | */ | 
| 3872 |  | 
| 3873 | /*! | 
| 3874 |     \enum QRegExp::PatternSyntax | 
| 3875 |  | 
| 3876 |     The syntax used to interpret the meaning of the pattern. | 
| 3877 |  | 
| 3878 |     \value RegExp A rich Perl-like pattern matching syntax. This is | 
| 3879 |     the default. | 
| 3880 |  | 
| 3881 |     \value RegExp2 Like RegExp, but with \l{greedy quantifiers}. | 
| 3882 |     (Introduced in Qt 4.2.) | 
| 3883 |  | 
| 3884 |     \value Wildcard This provides a simple pattern matching syntax | 
| 3885 |     similar to that used by shells (command interpreters) for "file | 
| 3886 |     globbing". See \l{QRegExp wildcard matching}. | 
| 3887 |  | 
| 3888 |     \value WildcardUnix This is similar to Wildcard but with the | 
| 3889 |     behavior of a Unix shell. The wildcard characters can be escaped | 
| 3890 |     with the character "\\". | 
| 3891 |  | 
| 3892 |     \value FixedString The pattern is a fixed string. This is | 
| 3893 |     equivalent to using the RegExp pattern on a string in | 
| 3894 |     which all metacharacters are escaped using escape(). | 
| 3895 |  | 
| 3896 |     \value W3CXmlSchema11 The pattern is a regular expression as | 
| 3897 |     defined by the W3C XML Schema 1.1 specification. | 
| 3898 |  | 
| 3899 |     \sa setPatternSyntax() | 
| 3900 | */ | 
| 3901 |  | 
| 3902 | /*! | 
| 3903 |     Constructs an empty regexp. | 
| 3904 |  | 
| 3905 |     \sa isValid(), errorString() | 
| 3906 | */ | 
| 3907 | QRegExp::QRegExp() | 
| 3908 | { | 
| 3909 |     priv = new QRegExpPrivate; | 
| 3910 |     prepareEngine(priv); | 
| 3911 | } | 
| 3912 |  | 
| 3913 | /*! | 
| 3914 |     Constructs a regular expression object for the given \a pattern | 
| 3915 |     string. The pattern must be given using wildcard notation if \a | 
| 3916 |     syntax is \l Wildcard; the default is \l RegExp. The pattern is | 
| 3917 |     case sensitive, unless \a cs is Qt::CaseInsensitive. Matching is | 
| 3918 |     greedy (maximal), but can be changed by calling | 
| 3919 |     setMinimal(). | 
| 3920 |  | 
| 3921 |     \sa setPattern(), setCaseSensitivity(), setPatternSyntax() | 
| 3922 | */ | 
| 3923 | QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax) | 
| 3924 | { | 
| 3925 |     priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs)); | 
| 3926 |     prepareEngine(priv); | 
| 3927 | } | 
| 3928 |  | 
| 3929 | /*! | 
| 3930 |     Constructs a regular expression as a copy of \a rx. | 
| 3931 |  | 
| 3932 |     \sa operator=() | 
| 3933 | */ | 
| 3934 | QRegExp::QRegExp(const QRegExp &rx) | 
| 3935 | { | 
| 3936 |     priv = new QRegExpPrivate; | 
| 3937 |     operator=(rx); | 
| 3938 | } | 
| 3939 |  | 
| 3940 | /*! | 
| 3941 |     Destroys the regular expression and cleans up its internal data. | 
| 3942 | */ | 
| 3943 | QRegExp::~QRegExp() | 
| 3944 | { | 
| 3945 |     invalidateEngine(priv); | 
| 3946 |     delete priv; | 
| 3947 | } | 
| 3948 |  | 
| 3949 | /*! | 
| 3950 |     Copies the regular expression \a rx and returns a reference to the | 
| 3951 |     copy. The case sensitivity, wildcard, and minimal matching options | 
| 3952 |     are also copied. | 
| 3953 | */ | 
| 3954 | QRegExp &QRegExp::operator=(const QRegExp &rx) | 
| 3955 | { | 
| 3956 |     prepareEngine(priv: rx.priv); // to allow sharing | 
| 3957 |     QRegExpEngine *otherEng = rx.priv->eng; | 
| 3958 |     if (otherEng) | 
| 3959 |         otherEng->ref.ref(); | 
| 3960 |     invalidateEngine(priv); | 
| 3961 |     priv->eng = otherEng; | 
| 3962 |     priv->engineKey = rx.priv->engineKey; | 
| 3963 |     priv->minimal = rx.priv->minimal; | 
| 3964 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 3965 |     priv->t = rx.priv->t; | 
| 3966 |     priv->capturedCache = rx.priv->capturedCache; | 
| 3967 | #endif | 
| 3968 |     if (priv->eng) | 
| 3969 |         priv->matchState.prepareForMatch(eng: priv->eng); | 
| 3970 |     priv->matchState.captured = rx.priv->matchState.captured; | 
| 3971 |     return *this; | 
| 3972 | } | 
| 3973 |  | 
| 3974 | /*! | 
| 3975 |     \fn QRegExp &QRegExp::operator=(QRegExp &&other) | 
| 3976 |  | 
| 3977 |     Move-assigns \a other to this QRegExp instance. | 
| 3978 |  | 
| 3979 |     \since 5.2 | 
| 3980 | */ | 
| 3981 |  | 
| 3982 | /*! | 
| 3983 |     \fn void QRegExp::swap(QRegExp &other) | 
| 3984 |     \since 4.8 | 
| 3985 |  | 
| 3986 |     Swaps regular expression \a other with this regular | 
| 3987 |     expression. This operation is very fast and never fails. | 
| 3988 | */ | 
| 3989 |  | 
| 3990 | /*! | 
| 3991 |     Returns \c true if this regular expression is equal to \a rx; | 
| 3992 |     otherwise returns \c false. | 
| 3993 |  | 
| 3994 |     Two QRegExp objects are equal if they have the same pattern | 
| 3995 |     strings and the same settings for case sensitivity, wildcard and | 
| 3996 |     minimal matching. | 
| 3997 | */ | 
| 3998 | bool QRegExp::operator==(const QRegExp &rx) const | 
| 3999 | { | 
| 4000 |     return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal; | 
| 4001 | } | 
| 4002 |  | 
| 4003 | /*! | 
| 4004 |     \since 5.6 | 
| 4005 |     \relates QRegExp | 
| 4006 |  | 
| 4007 |     Returns the hash value for \a key, using | 
| 4008 |     \a seed to seed the calculation. | 
| 4009 | */ | 
| 4010 | size_t qHash(const QRegExp &key, size_t seed) noexcept | 
| 4011 | { | 
| 4012 |     QtPrivate::QHashCombine hash; | 
| 4013 |     seed = hash(seed, key.priv->engineKey); | 
| 4014 |     seed = hash(seed, key.priv->minimal); | 
| 4015 |     return seed; | 
| 4016 | } | 
| 4017 |  | 
| 4018 | /*! | 
| 4019 |     \fn bool QRegExp::operator!=(const QRegExp &rx) const | 
| 4020 |  | 
| 4021 |     Returns \c true if this regular expression is not equal to \a rx; | 
| 4022 |     otherwise returns \c false. | 
| 4023 |  | 
| 4024 |     \sa operator==() | 
| 4025 | */ | 
| 4026 |  | 
| 4027 | /*! | 
| 4028 |     Returns \c true if the pattern string is empty; otherwise returns | 
| 4029 |     false. | 
| 4030 |  | 
| 4031 |     If you call exactMatch() with an empty pattern on an empty string | 
| 4032 |     it will return true; otherwise it returns \c false since it operates | 
| 4033 |     over the whole string. If you call indexIn() with an empty pattern | 
| 4034 |     on \e any string it will return the start offset (0 by default) | 
| 4035 |     because the empty pattern matches the 'emptiness' at the start of | 
| 4036 |     the string. In this case the length of the match returned by | 
| 4037 |     matchedLength() will be 0. | 
| 4038 |  | 
| 4039 |     See QString::isEmpty(). | 
| 4040 | */ | 
| 4041 |  | 
| 4042 | bool QRegExp::isEmpty() const | 
| 4043 | { | 
| 4044 |     return priv->engineKey.pattern.isEmpty(); | 
| 4045 | } | 
| 4046 |  | 
| 4047 | /*! | 
| 4048 |     Returns \c true if the regular expression is valid; otherwise returns | 
| 4049 |     false. An invalid regular expression never matches. | 
| 4050 |  | 
| 4051 |     The pattern \b{[a-z} is an example of an invalid pattern, since | 
| 4052 |     it lacks a closing square bracket. | 
| 4053 |  | 
| 4054 |     Note that the validity of a regexp may also depend on the setting | 
| 4055 |     of the wildcard flag, for example \b{*.html} is a valid | 
| 4056 |     wildcard regexp but an invalid full regexp. | 
| 4057 |  | 
| 4058 |     \sa errorString() | 
| 4059 | */ | 
| 4060 | bool QRegExp::isValid() const | 
| 4061 | { | 
| 4062 |     if (priv->engineKey.pattern.isEmpty()) { | 
| 4063 |         return true; | 
| 4064 |     } else { | 
| 4065 |         prepareEngine(priv); | 
| 4066 |         return priv->eng->isValid(); | 
| 4067 |     } | 
| 4068 | } | 
| 4069 |  | 
| 4070 | /*! | 
| 4071 |     Returns the pattern string of the regular expression. The pattern | 
| 4072 |     has either regular expression syntax or wildcard syntax, depending | 
| 4073 |     on patternSyntax(). | 
| 4074 |  | 
| 4075 |     \sa patternSyntax(), caseSensitivity() | 
| 4076 | */ | 
| 4077 | QString QRegExp::pattern() const | 
| 4078 | { | 
| 4079 |     return priv->engineKey.pattern; | 
| 4080 | } | 
| 4081 |  | 
| 4082 | /*! | 
| 4083 |     Sets the pattern string to \a pattern. The case sensitivity, | 
| 4084 |     wildcard, and minimal matching options are not changed. | 
| 4085 |  | 
| 4086 |     \sa setPatternSyntax(), setCaseSensitivity() | 
| 4087 | */ | 
| 4088 | void QRegExp::setPattern(const QString &pattern) | 
| 4089 | { | 
| 4090 |     if (priv->engineKey.pattern != pattern) { | 
| 4091 |         invalidateEngine(priv); | 
| 4092 |         priv->engineKey.pattern = pattern; | 
| 4093 |     } | 
| 4094 | } | 
| 4095 |  | 
| 4096 | /*! | 
| 4097 |     Returns Qt::CaseSensitive if the regexp is matched case | 
| 4098 |     sensitively; otherwise returns Qt::CaseInsensitive. | 
| 4099 |  | 
| 4100 |     \sa patternSyntax(), pattern(), isMinimal() | 
| 4101 | */ | 
| 4102 | Qt::CaseSensitivity QRegExp::caseSensitivity() const | 
| 4103 | { | 
| 4104 |     return priv->engineKey.cs; | 
| 4105 | } | 
| 4106 |  | 
| 4107 | /*! | 
| 4108 |     Sets case sensitive matching to \a cs. | 
| 4109 |  | 
| 4110 |     If \a cs is Qt::CaseSensitive, \b{\\.txt$} matches | 
| 4111 |     \c{readme.txt} but not \c{README.TXT}. | 
| 4112 |  | 
| 4113 |     \sa setPatternSyntax(), setPattern(), setMinimal() | 
| 4114 | */ | 
| 4115 | void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs) | 
| 4116 | { | 
| 4117 |     if ((bool)cs != (bool)priv->engineKey.cs) { | 
| 4118 |         invalidateEngine(priv); | 
| 4119 |         priv->engineKey.cs = cs; | 
| 4120 |     } | 
| 4121 | } | 
| 4122 |  | 
| 4123 | /*! | 
| 4124 |     Returns the syntax used by the regular expression. The default is | 
| 4125 |     QRegExp::RegExp. | 
| 4126 |  | 
| 4127 |     \sa pattern(), caseSensitivity() | 
| 4128 | */ | 
| 4129 | QRegExp::PatternSyntax QRegExp::patternSyntax() const | 
| 4130 | { | 
| 4131 |     return priv->engineKey.patternSyntax; | 
| 4132 | } | 
| 4133 |  | 
| 4134 | /*! | 
| 4135 |     Sets the syntax mode for the regular expression. The default is | 
| 4136 |     QRegExp::RegExp. | 
| 4137 |  | 
| 4138 |     Setting \a syntax to QRegExp::Wildcard enables simple shell-like | 
| 4139 |     \l{QRegExp wildcard matching}. For example, \b{r*.txt} matches the | 
| 4140 |     string \c{readme.txt} in wildcard mode, but does not match | 
| 4141 |     \c{readme}. | 
| 4142 |  | 
| 4143 |     Setting \a syntax to QRegExp::FixedString means that the pattern | 
| 4144 |     is interpreted as a plain string. Special characters (e.g., | 
| 4145 |     backslash) don't need to be escaped then. | 
| 4146 |  | 
| 4147 |     \sa setPattern(), setCaseSensitivity(), escape() | 
| 4148 | */ | 
| 4149 | void QRegExp::setPatternSyntax(PatternSyntax syntax) | 
| 4150 | { | 
| 4151 |     if (syntax != priv->engineKey.patternSyntax) { | 
| 4152 |         invalidateEngine(priv); | 
| 4153 |         priv->engineKey.patternSyntax = syntax; | 
| 4154 |     } | 
| 4155 | } | 
| 4156 |  | 
| 4157 | /*! | 
| 4158 |     Returns \c true if minimal (non-greedy) matching is enabled; | 
| 4159 |     otherwise returns \c false. | 
| 4160 |  | 
| 4161 |     \sa caseSensitivity(), setMinimal() | 
| 4162 | */ | 
| 4163 | bool QRegExp::isMinimal() const | 
| 4164 | { | 
| 4165 |     return priv->minimal; | 
| 4166 | } | 
| 4167 |  | 
| 4168 | /*! | 
| 4169 |     Enables or disables minimal matching. If \a minimal is false, | 
| 4170 |     matching is greedy (maximal) which is the default. | 
| 4171 |  | 
| 4172 |     For example, suppose we have the input string "We must be | 
| 4173 |     <b>bold</b>, very <b>bold</b>!" and the pattern | 
| 4174 |     \b{<b>.*</b>}. With the default greedy (maximal) matching, | 
| 4175 |     the match is "We must be \underline{<b>bold</b>, very | 
| 4176 |     <b>bold</b>}!". But with minimal (non-greedy) matching, the | 
| 4177 |     first match is: "We must be \underline{<b>bold</b>}, very | 
| 4178 |     <b>bold</b>!" and the second match is "We must be <b>bold</b>, | 
| 4179 |     very \underline{<b>bold</b>}!". In practice we might use the pattern | 
| 4180 |     \b{<b>[^<]*\</b>} instead, although this will still fail for | 
| 4181 |     nested tags. | 
| 4182 |  | 
| 4183 |     \sa setCaseSensitivity() | 
| 4184 | */ | 
| 4185 | void QRegExp::setMinimal(bool minimal) | 
| 4186 | { | 
| 4187 |     priv->minimal = minimal; | 
| 4188 | } | 
| 4189 |  | 
| 4190 | // ### Qt 5: make non-const | 
| 4191 | /*! | 
| 4192 |     Returns \c true if \a str is matched exactly by this regular | 
| 4193 |     expression; otherwise returns \c false. You can determine how much of | 
| 4194 |     the string was matched by calling matchedLength(). | 
| 4195 |  | 
| 4196 |     For a given regexp string R, exactMatch("R") is the equivalent of | 
| 4197 |     indexIn("^R$") since exactMatch() effectively encloses the regexp | 
| 4198 |     in the start of string and end of string anchors, except that it | 
| 4199 |     sets matchedLength() differently. | 
| 4200 |  | 
| 4201 |     For example, if the regular expression is \b{blue}, then | 
| 4202 |     exactMatch() returns \c true only for input \c blue. For inputs \c | 
| 4203 |     bluebell, \c blutak and \c lightblue, exactMatch() returns \c false | 
| 4204 |     and matchedLength() will return 4, 3 and 0 respectively. | 
| 4205 |  | 
| 4206 |     Although const, this function sets matchedLength(), | 
| 4207 |     capturedTexts(), and pos(). | 
| 4208 |  | 
| 4209 |     \sa indexIn(), lastIndexIn() | 
| 4210 | */ | 
| 4211 | bool QRegExp::exactMatch(const QString &str) const | 
| 4212 | { | 
| 4213 |     prepareEngineForMatch(priv, str); | 
| 4214 |     priv->matchState.match(str0: str.unicode(), len0: str.size(), pos0: 0, minimal0: priv->minimal, oneTest: true, caretIndex: 0); | 
| 4215 |     if (priv->matchState.captured[1] == str.size()) { | 
| 4216 |         return true; | 
| 4217 |     } else { | 
| 4218 |         priv->matchState.captured[0] = 0; | 
| 4219 |         priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen; | 
| 4220 |         return false; | 
| 4221 |     } | 
| 4222 | } | 
| 4223 |  | 
| 4224 | /*! | 
| 4225 |    Returns the regexp as a QVariant | 
| 4226 | */ | 
| 4227 | QRegExp::operator QVariant() const | 
| 4228 | { | 
| 4229 | QT_WARNING_PUSH QT_WARNING_DISABLE_DEPRECATED | 
| 4230 |     QVariant v; | 
| 4231 |     v.setValue(*this); | 
| 4232 |     return v; | 
| 4233 | QT_WARNING_POP | 
| 4234 | } | 
| 4235 |  | 
| 4236 | // ### Qt 5: make non-const | 
| 4237 | /*! | 
| 4238 |     Attempts to find a match in \a str from position \a offset (0 by | 
| 4239 |     default). If \a offset is -1, the search starts at the last | 
| 4240 |     character; if -2, at the next to last character; etc. | 
| 4241 |  | 
| 4242 |     Returns the position of the first match, or -1 if there was no | 
| 4243 |     match. | 
| 4244 |  | 
| 4245 |     The \a caretMode parameter can be used to instruct whether \b{^} | 
| 4246 |     should match at index 0 or at \a offset. | 
| 4247 |  | 
| 4248 |     You might prefer to use QString::indexOf(), QString::contains(), | 
| 4249 |     or even QStringList::filter(). To replace matches use | 
| 4250 |     QString::replace(). | 
| 4251 |  | 
| 4252 |     Example: | 
| 4253 |     \snippet code/src_corelib_text_qregexp.cpp 13 | 
| 4254 |  | 
| 4255 |     Although const, this function sets matchedLength(), | 
| 4256 |     capturedTexts() and pos(). | 
| 4257 |  | 
| 4258 |     If the QRegExp is a wildcard expression (see setPatternSyntax()) | 
| 4259 |     and want to test a string against the whole wildcard expression, | 
| 4260 |     use exactMatch() instead of this function. | 
| 4261 |  | 
| 4262 |     \sa lastIndexIn(), exactMatch() | 
| 4263 | */ | 
| 4264 |  | 
| 4265 | int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const | 
| 4266 | { | 
| 4267 |     prepareEngineForMatch(priv, str); | 
| 4268 |     if (offset < 0) | 
| 4269 |         offset += str.size(); | 
| 4270 |     priv->matchState.match(str0: str.unicode(), len0: str.size(), pos0: offset, | 
| 4271 |         minimal0: priv->minimal, oneTest: false, caretIndex: caretIndex(offset, caretMode)); | 
| 4272 |     return priv->matchState.captured[0]; | 
| 4273 | } | 
| 4274 |  | 
| 4275 | // ### Qt 5: make non-const | 
| 4276 | /*! | 
| 4277 |     Attempts to find a match backwards in \a str from position \a | 
| 4278 |     offset. If \a offset is -1 (the default), the search starts at the | 
| 4279 |     last character; if -2, at the next to last character; etc. | 
| 4280 |  | 
| 4281 |     Returns the position of the first match, or -1 if there was no | 
| 4282 |     match. | 
| 4283 |  | 
| 4284 |     The \a caretMode parameter can be used to instruct whether \b{^} | 
| 4285 |     should match at index 0 or at \a offset. | 
| 4286 |  | 
| 4287 |     Although const, this function sets matchedLength(), | 
| 4288 |     capturedTexts() and pos(). | 
| 4289 |  | 
| 4290 |     \warning Searching backwards is much slower than searching | 
| 4291 |     forwards. | 
| 4292 |  | 
| 4293 |     \sa indexIn(), exactMatch() | 
| 4294 | */ | 
| 4295 |  | 
| 4296 | int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const | 
| 4297 | { | 
| 4298 |     prepareEngineForMatch(priv, str); | 
| 4299 |     if (offset < 0) | 
| 4300 |         offset += str.size(); | 
| 4301 |     if (offset < 0 || offset > str.size()) { | 
| 4302 |         memset(s: priv->matchState.captured, c: -1, n: priv->matchState.capturedSize*sizeof(int)); | 
| 4303 |         return -1; | 
| 4304 |     } | 
| 4305 |  | 
| 4306 |     while (offset >= 0) { | 
| 4307 |         priv->matchState.match(str0: str.unicode(), len0: str.size(), pos0: offset, | 
| 4308 |             minimal0: priv->minimal, oneTest: true, caretIndex: caretIndex(offset, caretMode)); | 
| 4309 |         if (priv->matchState.captured[0] == offset) | 
| 4310 |             return offset; | 
| 4311 |         --offset; | 
| 4312 |     } | 
| 4313 |     return -1; | 
| 4314 | } | 
| 4315 |  | 
| 4316 | /*! | 
| 4317 |     Returns the length of the last matched string, or -1 if there was | 
| 4318 |     no match. | 
| 4319 |  | 
| 4320 |     \sa exactMatch(), indexIn(), lastIndexIn() | 
| 4321 | */ | 
| 4322 | int QRegExp::matchedLength() const | 
| 4323 | { | 
| 4324 |     return priv->matchState.captured[1]; | 
| 4325 | } | 
| 4326 |  | 
| 4327 |  | 
| 4328 | /*! | 
| 4329 |   Replaces every occurrence of this regular expression in | 
| 4330 |   \a str with \a after and returns the result. | 
| 4331 |  | 
| 4332 |   For regular expressions containing \l{capturing parentheses}, | 
| 4333 |   occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced | 
| 4334 |   with \c {rx}.cap(1), cap(2), ... | 
| 4335 |  | 
| 4336 |   \sa indexIn(), lastIndexIn(), QRegExp::cap() | 
| 4337 | */ | 
| 4338 | QString QRegExp::replaceIn(const QString &str, const QString &after) const | 
| 4339 | { | 
| 4340 |     struct QStringCapture | 
| 4341 |     { | 
| 4342 |         int pos; | 
| 4343 |         int len; | 
| 4344 |         int no; | 
| 4345 |     }; | 
| 4346 |  | 
| 4347 |     QRegExp rx2(*this); | 
| 4348 |  | 
| 4349 |     if (str.isEmpty() && rx2.indexIn(str) == -1) | 
| 4350 |         return str; | 
| 4351 |  | 
| 4352 |     QString s(str); | 
| 4353 |  | 
| 4354 |     int index = 0; | 
| 4355 |     int numCaptures = rx2.captureCount(); | 
| 4356 |     int al = after.size(); | 
| 4357 |     QRegExp::CaretMode caretMode = QRegExp::CaretAtZero; | 
| 4358 |  | 
| 4359 |     if (numCaptures > 0) { | 
| 4360 |         const QChar *uc = after.unicode(); | 
| 4361 |         int numBackRefs = 0; | 
| 4362 |  | 
| 4363 |         for (int i = 0; i < al - 1; i++) { | 
| 4364 |             if (uc[i] == QLatin1Char('\\')) { | 
| 4365 |                 int no = uc[i + 1].digitValue(); | 
| 4366 |                 if (no > 0 && no <= numCaptures) | 
| 4367 |                     numBackRefs++; | 
| 4368 |             } | 
| 4369 |         } | 
| 4370 |  | 
| 4371 |         /* | 
| 4372 |             This is the harder case where we have back-references. | 
| 4373 |         */ | 
| 4374 |         if (numBackRefs > 0) { | 
| 4375 |             QVarLengthArray<QStringCapture, 16> captures(numBackRefs); | 
| 4376 |             int j = 0; | 
| 4377 |  | 
| 4378 |             for (int i = 0; i < al - 1; i++) { | 
| 4379 |                 if (uc[i] == QLatin1Char('\\')) { | 
| 4380 |                     int no = uc[i + 1].digitValue(); | 
| 4381 |                     if (no > 0 && no <= numCaptures) { | 
| 4382 |                         QStringCapture capture; | 
| 4383 |                         capture.pos = i; | 
| 4384 |                         capture.len = 2; | 
| 4385 |  | 
| 4386 |                         if (i < al - 2) { | 
| 4387 |                             int secondDigit = uc[i + 2].digitValue(); | 
| 4388 |                             if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) { | 
| 4389 |                                 no = (no * 10) + secondDigit; | 
| 4390 |                                 ++capture.len; | 
| 4391 |                             } | 
| 4392 |                         } | 
| 4393 |  | 
| 4394 |                         capture.no = no; | 
| 4395 |                         captures[j++] = capture; | 
| 4396 |                     } | 
| 4397 |                 } | 
| 4398 |             } | 
| 4399 |  | 
| 4400 |             while (index <= s.size()) { | 
| 4401 |                 index = rx2.indexIn(str: s, offset: index, caretMode); | 
| 4402 |                 if (index == -1) | 
| 4403 |                     break; | 
| 4404 |  | 
| 4405 |                 QString after2(after); | 
| 4406 |                 for (j = numBackRefs - 1; j >= 0; j--) { | 
| 4407 |                     const QStringCapture &capture = captures[j]; | 
| 4408 |                     after2.replace(i: capture.pos, len: capture.len, after: rx2.cap(nth: capture.no)); | 
| 4409 |                 } | 
| 4410 |  | 
| 4411 |                 s.replace(i: index, len: rx2.matchedLength(), after: after2); | 
| 4412 |                 index += after2.size(); | 
| 4413 |  | 
| 4414 |                 // avoid infinite loop on 0-length matches (e.g., QRegExp("[a-z]*")) | 
| 4415 |                 if (rx2.matchedLength() == 0) | 
| 4416 |                     ++index; | 
| 4417 |  | 
| 4418 |                 caretMode = QRegExp::CaretWontMatch; | 
| 4419 |             } | 
| 4420 |             return s; | 
| 4421 |         } | 
| 4422 |     } | 
| 4423 |  | 
| 4424 |     /* | 
| 4425 |         This is the simple and optimized case where we don't have | 
| 4426 |         back-references. | 
| 4427 |     */ | 
| 4428 |     while (index != -1) { | 
| 4429 |         struct { | 
| 4430 |             int pos; | 
| 4431 |             int length; | 
| 4432 |         } replacements[2048]; | 
| 4433 |  | 
| 4434 |         int pos = 0; | 
| 4435 |         int adjust = 0; | 
| 4436 |         while (pos < 2047) { | 
| 4437 |             index = rx2.indexIn(str: s, offset: index, caretMode); | 
| 4438 |             if (index == -1) | 
| 4439 |                 break; | 
| 4440 |             int ml = rx2.matchedLength(); | 
| 4441 |             replacements[pos].pos = index; | 
| 4442 |             replacements[pos++].length = ml; | 
| 4443 |             index += ml; | 
| 4444 |             adjust += al - ml; | 
| 4445 |             // avoid infinite loop | 
| 4446 |             if (!ml) | 
| 4447 |                 index++; | 
| 4448 |         } | 
| 4449 |         if (!pos) | 
| 4450 |             break; | 
| 4451 |         replacements[pos].pos = s.size(); | 
| 4452 |         int newlen = s.size() + adjust; | 
| 4453 |  | 
| 4454 |         // to continue searching at the right position after we did | 
| 4455 |         // the first round of replacements | 
| 4456 |         if (index != -1) | 
| 4457 |             index += adjust; | 
| 4458 |         QString newstring; | 
| 4459 |         newstring.reserve(asize: newlen + 1); | 
| 4460 |         QChar *newuc = newstring.data(); | 
| 4461 |         QChar *uc = newuc; | 
| 4462 |         int copystart = 0; | 
| 4463 |         int i = 0; | 
| 4464 |         while (i < pos) { | 
| 4465 |             int copyend = replacements[i].pos; | 
| 4466 |             int size = copyend - copystart; | 
| 4467 |             memcpy(dest: static_cast<void*>(uc), src: static_cast<const void *>(s.constData() + copystart), n: size * sizeof(QChar)); | 
| 4468 |             uc += size; | 
| 4469 |             memcpy(dest: static_cast<void *>(uc), src: static_cast<const void *>(after.constData()), n: al * sizeof(QChar)); | 
| 4470 |             uc += al; | 
| 4471 |             copystart = copyend + replacements[i].length; | 
| 4472 |             i++; | 
| 4473 |         } | 
| 4474 |         memcpy(dest: static_cast<void *>(uc), src: static_cast<const void *>(s.constData() + copystart), n: (s.size() - copystart) * sizeof(QChar)); | 
| 4475 |         newstring.resize(size: newlen); | 
| 4476 |         s = newstring; | 
| 4477 |         caretMode = QRegExp::CaretWontMatch; | 
| 4478 |     } | 
| 4479 |     return s; | 
| 4480 |  | 
| 4481 | } | 
| 4482 |  | 
| 4483 |  | 
| 4484 | /*! | 
| 4485 |   \fn QString QRegExp::removeIn(const QString &str) const | 
| 4486 |  | 
| 4487 |   Removes every occurrence of this regular expression \a str, and | 
| 4488 |   returns the result | 
| 4489 |  | 
| 4490 |   Does the same as replaceIn(str, QString()). | 
| 4491 |  | 
| 4492 |   \sa indexIn(), lastIndexIn(), replaceIn() | 
| 4493 | */ | 
| 4494 |  | 
| 4495 |  | 
| 4496 | /*! | 
| 4497 |   \fn QString QRegExp::countIn(const QString &str) const | 
| 4498 |  | 
| 4499 |    Returns the number of times this regular expression matches | 
| 4500 |    in \a str. | 
| 4501 |  | 
| 4502 |   \sa indexIn(), lastIndexIn(), replaceIn() | 
| 4503 | */ | 
| 4504 |  | 
| 4505 | int QRegExp::countIn(const QString &str) const | 
| 4506 | { | 
| 4507 |     QRegExp rx2(*this); | 
| 4508 |     int count = 0; | 
| 4509 |     int index = -1; | 
| 4510 |     int len = str.size(); | 
| 4511 |     while (index < len - 1) {                 // count overlapping matches | 
| 4512 |         index = rx2.indexIn(str, offset: index + 1); | 
| 4513 |         if (index == -1) | 
| 4514 |             break; | 
| 4515 |         count++; | 
| 4516 |     } | 
| 4517 |     return count; | 
| 4518 | } | 
| 4519 |  | 
| 4520 | /*! | 
| 4521 |     Splits \a str into substrings wherever this regular expression | 
| 4522 |     matches, and returns the list of those strings. If this regular | 
| 4523 |     expression does not match anywhere in the string, split() returns a | 
| 4524 |     single-element list containing \a str. | 
| 4525 |  | 
| 4526 |     If \a behavior is set to Qt::KeepEmptyParts, empty fields are | 
| 4527 |     included in the resulting list. | 
| 4528 |  | 
| 4529 |     \sa QStringList::join(), QString::split() | 
| 4530 | */ | 
| 4531 | QStringList QRegExp::splitString(const QString &str, Qt::SplitBehavior behavior) const | 
| 4532 | { | 
| 4533 |     QRegExp rx2(*this); | 
| 4534 |     QStringList list; | 
| 4535 |     int start = 0; | 
| 4536 |     int  = 0; | 
| 4537 |     int end; | 
| 4538 |     while ((end = rx2.indexIn(str, offset: start + extra)) != -1) { | 
| 4539 |         int matchedLen = rx2.matchedLength(); | 
| 4540 |         if (start != end || behavior == Qt::KeepEmptyParts) | 
| 4541 |             list.append(t: str.mid(position: start, n: end - start)); | 
| 4542 |         start = end + matchedLen; | 
| 4543 |         extra = (matchedLen == 0) ? 1 : 0; | 
| 4544 |     } | 
| 4545 |     if (start != str.size() || behavior == Qt::KeepEmptyParts) | 
| 4546 |         list.append(t: str.mid(position: start, n: -1)); | 
| 4547 |     return list; | 
| 4548 | } | 
| 4549 |  | 
| 4550 | /*! | 
| 4551 |     Returns a list of all the strings that match this regular | 
| 4552 |     expression in \a stringList. | 
| 4553 | */ | 
| 4554 | QStringList QRegExp::filterList(const QStringList &stringList) const | 
| 4555 | { | 
| 4556 |     QStringList res; | 
| 4557 |     for (const QString &s : stringList) { | 
| 4558 |         if (containedIn(str: s)) | 
| 4559 |             res << s; | 
| 4560 |     } | 
| 4561 |     return res; | 
| 4562 | } | 
| 4563 |  | 
| 4564 | /*! | 
| 4565 |     Replaces every occurrence of this regexp, in each of \a stringList's | 
| 4566 |     with \a after. Returns a reference to the string list. | 
| 4567 | */ | 
| 4568 | QStringList QRegExp::replaceIn(const QStringList &stringList, const QString &after) const | 
| 4569 | { | 
| 4570 |     QStringList list; | 
| 4571 |     for (const QString &s : stringList) | 
| 4572 |         list << replaceIn(str: s, after); | 
| 4573 |     return list; | 
| 4574 | } | 
| 4575 |  | 
| 4576 | /*! | 
| 4577 |     Returns the index position of the first exact match of this regexp in | 
| 4578 |     \a list, searching forward from index position \a from. Returns | 
| 4579 |     -1 if no item matched. | 
| 4580 |  | 
| 4581 |     \sa lastIndexIn(), exactMatch() | 
| 4582 | */ | 
| 4583 | int QRegExp::indexIn(const QStringList &list, int from) const | 
| 4584 | { | 
| 4585 |     QRegExp rx2(*this); | 
| 4586 |     if (from < 0) | 
| 4587 |         from = qMax(a: from + list.size(), b: 0); | 
| 4588 |     for (int i = from; i < list.size(); ++i) { | 
| 4589 |         if (rx2.exactMatch(str: list.at(i))) | 
| 4590 |            return i; | 
| 4591 |     } | 
| 4592 |     return -1; | 
| 4593 | } | 
| 4594 |  | 
| 4595 | /*! | 
| 4596 |     Returns the index position of the last exact match of this regexp in | 
| 4597 |     \a list, searching backward from index position \a from. If \a | 
| 4598 |     from is -1 (the default), the search starts at the last item. | 
| 4599 |     Returns -1 if no item matched. | 
| 4600 |  | 
| 4601 |     \sa QRegExp::exactMatch() | 
| 4602 | */ | 
| 4603 | int QRegExp::lastIndexIn(const QStringList &list, int from) const | 
| 4604 | { | 
| 4605 |     QRegExp rx2(*this); | 
| 4606 |     if (from < 0) | 
| 4607 |         from += list.size(); | 
| 4608 |     else if (from >= list.size()) | 
| 4609 |         from = list.size() - 1; | 
| 4610 |     for (int i = from; i >= 0; --i) { | 
| 4611 |         if (rx2.exactMatch(str: list.at(i))) | 
| 4612 |             return i; | 
| 4613 |     } | 
| 4614 |     return -1; | 
| 4615 | } | 
| 4616 |  | 
| 4617 | #ifndef QT_NO_REGEXP_CAPTURE | 
| 4618 |  | 
| 4619 | /*! | 
| 4620 |   \since 4.6 | 
| 4621 |   Returns the number of captures contained in the regular expression. | 
| 4622 |  */ | 
| 4623 | int QRegExp::captureCount() const | 
| 4624 | { | 
| 4625 |     prepareEngine(priv); | 
| 4626 |     return priv->eng->captureCount(); | 
| 4627 | } | 
| 4628 |  | 
| 4629 | /*! | 
| 4630 |     Returns a list of the captured text strings. | 
| 4631 |  | 
| 4632 |     The first string in the list is the entire matched string. Each | 
| 4633 |     subsequent list element contains a string that matched a | 
| 4634 |     (capturing) subexpression of the regexp. | 
| 4635 |  | 
| 4636 |     For example: | 
| 4637 |     \snippet code/src_corelib_text_qregexp.cpp 14 | 
| 4638 |  | 
| 4639 |     The above example also captures elements that may be present but | 
| 4640 |     which we have no interest in. This problem can be solved by using | 
| 4641 |     non-capturing parentheses: | 
| 4642 |  | 
| 4643 |     \snippet code/src_corelib_text_qregexp.cpp 15 | 
| 4644 |  | 
| 4645 |     Note that if you want to iterate over the list, you should iterate | 
| 4646 |     over a copy, e.g. | 
| 4647 |     \snippet code/src_corelib_text_qregexp.cpp 16 | 
| 4648 |  | 
| 4649 |     Some regexps can match an indeterminate number of times. For | 
| 4650 |     example if the input string is "Offsets: 12 14 99 231 7" and the | 
| 4651 |     regexp, \c{rx}, is \b{(\\d+)+}, we would hope to get a list of | 
| 4652 |     all the numbers matched. However, after calling | 
| 4653 |     \c{rx.indexIn(str)}, capturedTexts() will return the list ("12", | 
| 4654 |     "12"), i.e. the entire match was "12" and the first subexpression | 
| 4655 |     matched was "12". The correct approach is to use cap() in a | 
| 4656 |     \l{QRegExp#cap_in_a_loop}{loop}. | 
| 4657 |  | 
| 4658 |     The order of elements in the string list is as follows. The first | 
| 4659 |     element is the entire matching string. Each subsequent element | 
| 4660 |     corresponds to the next capturing open left parentheses. Thus | 
| 4661 |     capturedTexts()[1] is the text of the first capturing parentheses, | 
| 4662 |     capturedTexts()[2] is the text of the second and so on | 
| 4663 |     (corresponding to $1, $2, etc., in some other regexp languages). | 
| 4664 |  | 
| 4665 |     \sa cap(), pos() | 
| 4666 | */ | 
| 4667 | QStringList QRegExp::capturedTexts() const | 
| 4668 | { | 
| 4669 |     if (priv->capturedCache.isEmpty()) { | 
| 4670 |         prepareEngine(priv); | 
| 4671 |         const int *captured = priv->matchState.captured; | 
| 4672 |         int n = priv->matchState.capturedSize; | 
| 4673 |  | 
| 4674 |         for (int i = 0; i < n; i += 2) { | 
| 4675 |             QString m; | 
| 4676 |             if (captured[i + 1] == 0) | 
| 4677 |                 m = QLatin1String("" ); // ### Qt 5: don't distinguish between null and empty | 
| 4678 |             else if (captured[i] >= 0) | 
| 4679 |                 m = priv->t.mid(position: captured[i], n: captured[i + 1]); | 
| 4680 |             priv->capturedCache.append(t: m); | 
| 4681 |         } | 
| 4682 |         priv->t.clear(); | 
| 4683 |     } | 
| 4684 |     return priv->capturedCache; | 
| 4685 | } | 
| 4686 |  | 
| 4687 | /*! | 
| 4688 |     \internal | 
| 4689 | */ | 
| 4690 | QStringList QRegExp::capturedTexts() | 
| 4691 | { | 
| 4692 |     return const_cast<const QRegExp *>(this)->capturedTexts(); | 
| 4693 | } | 
| 4694 |  | 
| 4695 | /*! | 
| 4696 |     Returns the text captured by the \a nth subexpression. The entire | 
| 4697 |     match has index 0 and the parenthesized subexpressions have | 
| 4698 |     indexes starting from 1 (excluding non-capturing parentheses). | 
| 4699 |  | 
| 4700 |     \snippet code/src_corelib_text_qregexp.cpp 17 | 
| 4701 |  | 
| 4702 |     The order of elements matched by cap() is as follows. The first | 
| 4703 |     element, cap(0), is the entire matching string. Each subsequent | 
| 4704 |     element corresponds to the next capturing open left parentheses. | 
| 4705 |     Thus cap(1) is the text of the first capturing parentheses, cap(2) | 
| 4706 |     is the text of the second, and so on. | 
| 4707 |  | 
| 4708 |     \sa capturedTexts(), pos() | 
| 4709 | */ | 
| 4710 | QString QRegExp::cap(int nth) const | 
| 4711 | { | 
| 4712 |     return capturedTexts().value(i: nth); | 
| 4713 | } | 
| 4714 |  | 
| 4715 | /*! | 
| 4716 |     \internal | 
| 4717 | */ | 
| 4718 | QString QRegExp::cap(int nth) | 
| 4719 | { | 
| 4720 |     return const_cast<const QRegExp *>(this)->cap(nth); | 
| 4721 | } | 
| 4722 |  | 
| 4723 | /*! | 
| 4724 |     Returns the position of the \a nth captured text in the searched | 
| 4725 |     string. If \a nth is 0 (the default), pos() returns the position | 
| 4726 |     of the whole match. | 
| 4727 |  | 
| 4728 |     Example: | 
| 4729 |     \snippet code/src_corelib_text_qregexp.cpp 18 | 
| 4730 |  | 
| 4731 |     For zero-length matches, pos() always returns -1. (For example, if | 
| 4732 |     cap(4) would return an empty string, pos(4) returns -1.) This is | 
| 4733 |     a feature of the implementation. | 
| 4734 |  | 
| 4735 |     \sa cap(), capturedTexts() | 
| 4736 | */ | 
| 4737 | int QRegExp::pos(int nth) const | 
| 4738 | { | 
| 4739 |     if (nth < 0 || nth >= priv->matchState.capturedSize / 2) | 
| 4740 |         return -1; | 
| 4741 |     else | 
| 4742 |         return priv->matchState.captured[2 * nth]; | 
| 4743 | } | 
| 4744 |  | 
| 4745 | /*! | 
| 4746 |     \internal | 
| 4747 | */ | 
| 4748 | int QRegExp::pos(int nth) | 
| 4749 | { | 
| 4750 |     return const_cast<const QRegExp *>(this)->pos(nth); | 
| 4751 | } | 
| 4752 |  | 
| 4753 | /*! | 
| 4754 |   Returns a text string that explains why a regexp pattern is | 
| 4755 |   invalid the case being; otherwise returns "no error occurred". | 
| 4756 |  | 
| 4757 |   \sa isValid() | 
| 4758 | */ | 
| 4759 | QString QRegExp::errorString() const | 
| 4760 | { | 
| 4761 |     if (isValid()) { | 
| 4762 |         return QString::fromLatin1(RXERR_OK); | 
| 4763 |     } else { | 
| 4764 |         return priv->eng->errorString(); | 
| 4765 |     } | 
| 4766 | } | 
| 4767 |  | 
| 4768 | /*! | 
| 4769 |     \internal | 
| 4770 | */ | 
| 4771 | QString QRegExp::errorString() | 
| 4772 | { | 
| 4773 |     return const_cast<const QRegExp *>(this)->errorString(); | 
| 4774 | } | 
| 4775 |  | 
| 4776 | #endif | 
| 4777 |  | 
| 4778 | /*! | 
| 4779 |     Returns the string \a str with every regexp special character | 
| 4780 |     escaped with a backslash. The special characters are $, (,), *, +, | 
| 4781 |     ., ?, [, \,], ^, {, | and }. | 
| 4782 |  | 
| 4783 |     Example: | 
| 4784 |  | 
| 4785 |     \snippet code/src_corelib_text_qregexp.cpp 19 | 
| 4786 |  | 
| 4787 |     This function is useful to construct regexp patterns dynamically: | 
| 4788 |  | 
| 4789 |     \snippet code/src_corelib_text_qregexp.cpp 20 | 
| 4790 |  | 
| 4791 |     \sa setPatternSyntax() | 
| 4792 | */ | 
| 4793 | QString QRegExp::escape(const QString &str) | 
| 4794 | { | 
| 4795 |     QString quoted; | 
| 4796 |     const int count = str.size(); | 
| 4797 |     quoted.reserve(asize: count * 2); | 
| 4798 |     const QLatin1Char backslash('\\'); | 
| 4799 |     for (int i = 0; i < count; i++) { | 
| 4800 |         switch (str.at(i).toLatin1()) { | 
| 4801 |         case '$': | 
| 4802 |         case '(': | 
| 4803 |         case ')': | 
| 4804 |         case '*': | 
| 4805 |         case '+': | 
| 4806 |         case '.': | 
| 4807 |         case '?': | 
| 4808 |         case '[': | 
| 4809 |         case '\\': | 
| 4810 |         case ']': | 
| 4811 |         case '^': | 
| 4812 |         case '{': | 
| 4813 |         case '|': | 
| 4814 |         case '}': | 
| 4815 |             quoted.append(c: backslash); | 
| 4816 |         } | 
| 4817 |         quoted.append(c: str.at(i)); | 
| 4818 |     } | 
| 4819 |     return quoted; | 
| 4820 | } | 
| 4821 |  | 
| 4822 |  | 
| 4823 | #ifndef QT_NO_DATASTREAM | 
| 4824 | /*! | 
| 4825 |     \relates QRegExp | 
| 4826 |  | 
| 4827 |     Writes the regular expression \a regExp to stream \a out. | 
| 4828 |  | 
| 4829 |     \sa {Serializing Qt Data Types} | 
| 4830 | */ | 
| 4831 | QDataStream &operator<<(QDataStream &out, const QRegExp ®Exp) | 
| 4832 | { | 
| 4833 |     return out << regExp.pattern() << (quint8)regExp.caseSensitivity() | 
| 4834 |                << (quint8)regExp.patternSyntax() | 
| 4835 |                << (quint8)!!regExp.isMinimal(); | 
| 4836 | } | 
| 4837 |  | 
| 4838 | /*! | 
| 4839 |     \relates QRegExp | 
| 4840 |  | 
| 4841 |     Reads a regular expression from stream \a in into \a regExp. | 
| 4842 |  | 
| 4843 |     \sa {Serializing Qt Data Types} | 
| 4844 | */ | 
| 4845 | QDataStream &operator>>(QDataStream &in, QRegExp ®Exp) | 
| 4846 | { | 
| 4847 |     QString pattern; | 
| 4848 |     quint8 cs; | 
| 4849 |     quint8 patternSyntax; | 
| 4850 |     quint8 isMinimal; | 
| 4851 |  | 
| 4852 |     in >> pattern >> cs >> patternSyntax >> isMinimal; | 
| 4853 |  | 
| 4854 |     QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs), | 
| 4855 |                       QRegExp::PatternSyntax(patternSyntax)); | 
| 4856 |  | 
| 4857 |     newRegExp.setMinimal(isMinimal); | 
| 4858 |     regExp = newRegExp; | 
| 4859 |     return in; | 
| 4860 | } | 
| 4861 | #endif // QT_NO_DATASTREAM | 
| 4862 |  | 
| 4863 | #ifndef QT_NO_DEBUG_STREAM | 
| 4864 | QDebug operator<<(QDebug dbg, const QRegExp &r) | 
| 4865 | { | 
| 4866 |     QDebugStateSaver saver(dbg); | 
| 4867 |     dbg.nospace() << "QRegExp(patternSyntax="  << r.patternSyntax() | 
| 4868 |                   << ", pattern='" << r.pattern() << "')" ; | 
| 4869 |     return dbg; | 
| 4870 | } | 
| 4871 | #endif | 
| 4872 |  | 
| 4873 | QT_END_NAMESPACE | 
| 4874 |  |