| 1 | /* GRegex -- regular expression API wrapper around PCRE. |
| 2 | * |
| 3 | * Copyright (C) 1999, 2000 Scott Wimer |
| 4 | * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> |
| 5 | * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public License |
| 18 | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
| 19 | */ |
| 20 | |
| 21 | #ifndef __G_REGEX_H__ |
| 22 | #define __G_REGEX_H__ |
| 23 | |
| 24 | #if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) |
| 25 | #error "Only <glib.h> can be included directly." |
| 26 | #endif |
| 27 | |
| 28 | #include <glib/gerror.h> |
| 29 | #include <glib/gstring.h> |
| 30 | |
| 31 | G_BEGIN_DECLS |
| 32 | |
| 33 | /** |
| 34 | * GRegexError: |
| 35 | * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed. |
| 36 | * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed. |
| 37 | * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement |
| 38 | * string. |
| 39 | * @G_REGEX_ERROR_MATCH: The match process failed. |
| 40 | * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine. |
| 41 | * Since 2.16 |
| 42 | * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16 |
| 43 | * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16 |
| 44 | * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\". |
| 45 | * Since 2.16 |
| 46 | * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}" |
| 47 | * quantifier. Since 2.16 |
| 48 | * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier. |
| 49 | * Since 2.16 |
| 50 | * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for |
| 51 | * character class. Since 2.16 |
| 52 | * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence |
| 53 | * in character class. Since 2.16 |
| 54 | * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class. |
| 55 | * Since 2.16 |
| 56 | * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16 |
| 57 | * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?", |
| 58 | * "(?<" or "(?P". Since 2.16 |
| 59 | * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are |
| 60 | * supported only within a class. Since 2.16 |
| 61 | * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")" |
| 62 | * without opening "(". Since 2.16 |
| 63 | * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent |
| 64 | * subpattern. Since 2.16 |
| 65 | * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment. |
| 66 | * Since 2.16 |
| 67 | * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large. |
| 68 | * Since 2.16 |
| 69 | * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16 |
| 70 | * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not |
| 71 | * fixed length. Since 2.16 |
| 72 | * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(". |
| 73 | * Since 2.16 |
| 74 | * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains |
| 75 | * more than two branches. Since 2.16 |
| 76 | * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(". |
| 77 | * Since 2.16 |
| 78 | * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name. |
| 79 | * Since 2.16 |
| 80 | * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating |
| 81 | * elements are not supported. Since 2.16 |
| 82 | * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence |
| 83 | * is too large. Since 2.16 |
| 84 | * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16 |
| 85 | * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in |
| 86 | * lookbehind assertion. Since 2.16 |
| 87 | * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely. |
| 88 | * Since 2.16 |
| 89 | * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator |
| 90 | * in subpattern name. Since 2.16 |
| 91 | * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have |
| 92 | * the same name. Since 2.16 |
| 93 | * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence. |
| 94 | * Since 2.16 |
| 95 | * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or |
| 96 | * "\\p". Since 2.16 |
| 97 | * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long |
| 98 | * (maximum 32 characters). Since 2.16 |
| 99 | * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum |
| 100 | * 10,000). Since 2.16 |
| 101 | * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377". |
| 102 | * Since 2.16 |
| 103 | * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more |
| 104 | * than one branch. Since 2.16 |
| 105 | * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed. |
| 106 | * This error is never raised. Since: 2.16 Deprecated: 2.34 |
| 107 | * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options. |
| 108 | * Since 2.16 |
| 109 | * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced, |
| 110 | * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16 |
| 111 | * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34 |
| 112 | * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing |
| 113 | * control verb used does not allow an argument. Since: 2.34 |
| 114 | * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing |
| 115 | * control verb. Since: 2.34 |
| 116 | * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34 |
| 117 | * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34 |
| 118 | * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34 |
| 119 | * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode, |
| 120 | * "[" is an invalid data character. Since: 2.34 |
| 121 | * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the |
| 122 | * same number are not allowed. Since: 2.34 |
| 123 | * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control |
| 124 | * verb requires an argument. Since: 2.34 |
| 125 | * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII |
| 126 | * character. Since: 2.34 |
| 127 | * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or |
| 128 | * quoted name. Since: 2.34 |
| 129 | * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34 |
| 130 | * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34 |
| 131 | * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)", |
| 132 | * "(*SKIP)", or "(*THEN)". Since: 2.34 |
| 133 | * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is |
| 134 | * too large. Since: 2.34 |
| 135 | * |
| 136 | * Error codes returned by regular expressions functions. |
| 137 | * |
| 138 | * Since: 2.14 |
| 139 | */ |
| 140 | typedef enum |
| 141 | { |
| 142 | G_REGEX_ERROR_COMPILE, |
| 143 | G_REGEX_ERROR_OPTIMIZE, |
| 144 | G_REGEX_ERROR_REPLACE, |
| 145 | G_REGEX_ERROR_MATCH, |
| 146 | G_REGEX_ERROR_INTERNAL, |
| 147 | |
| 148 | /* These are the error codes from PCRE + 100 */ |
| 149 | G_REGEX_ERROR_STRAY_BACKSLASH = 101, |
| 150 | G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102, |
| 151 | G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103, |
| 152 | G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104, |
| 153 | G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105, |
| 154 | G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106, |
| 155 | G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107, |
| 156 | G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108, |
| 157 | G_REGEX_ERROR_NOTHING_TO_REPEAT = 109, |
| 158 | G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112, |
| 159 | G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113, |
| 160 | G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114, |
| 161 | G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115, |
| 162 | = 118, |
| 163 | G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120, |
| 164 | G_REGEX_ERROR_MEMORY_ERROR = 121, |
| 165 | G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125, |
| 166 | G_REGEX_ERROR_MALFORMED_CONDITION = 126, |
| 167 | G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127, |
| 168 | G_REGEX_ERROR_ASSERTION_EXPECTED = 128, |
| 169 | G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130, |
| 170 | G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131, |
| 171 | G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134, |
| 172 | G_REGEX_ERROR_INVALID_CONDITION = 135, |
| 173 | G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136, |
| 174 | G_REGEX_ERROR_INFINITE_LOOP = 140, |
| 175 | G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142, |
| 176 | G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143, |
| 177 | G_REGEX_ERROR_MALFORMED_PROPERTY = 146, |
| 178 | G_REGEX_ERROR_UNKNOWN_PROPERTY = 147, |
| 179 | G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148, |
| 180 | G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149, |
| 181 | G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151, |
| 182 | G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154, |
| 183 | G_REGEX_ERROR_DEFINE_REPETION = 155, |
| 184 | G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156, |
| 185 | G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157, |
| 186 | G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158, |
| 187 | G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159, |
| 188 | G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160, |
| 189 | G_REGEX_ERROR_NUMBER_TOO_BIG = 161, |
| 190 | G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162, |
| 191 | G_REGEX_ERROR_MISSING_DIGIT = 163, |
| 192 | G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164, |
| 193 | = 165, |
| 194 | G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166, |
| 195 | G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168, |
| 196 | G_REGEX_ERROR_MISSING_NAME = 169, |
| 197 | G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171, |
| 198 | G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172, |
| 199 | G_REGEX_ERROR_NAME_TOO_LONG = 175, |
| 200 | G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176 |
| 201 | } GRegexError; |
| 202 | |
| 203 | /** |
| 204 | * G_REGEX_ERROR: |
| 205 | * |
| 206 | * Error domain for regular expressions. Errors in this domain will be |
| 207 | * from the #GRegexError enumeration. See #GError for information on |
| 208 | * error domains. |
| 209 | * |
| 210 | * Since: 2.14 |
| 211 | */ |
| 212 | #define G_REGEX_ERROR g_regex_error_quark () |
| 213 | |
| 214 | GLIB_AVAILABLE_IN_ALL |
| 215 | GQuark g_regex_error_quark (void); |
| 216 | |
| 217 | /** |
| 218 | * GRegexCompileFlags: |
| 219 | * @G_REGEX_CASELESS: Letters in the pattern match both upper- and |
| 220 | * lowercase letters. This option can be changed within a pattern |
| 221 | * by a "(?i)" option setting. |
| 222 | * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting |
| 223 | * of a single line of characters (even if it actually contains |
| 224 | * newlines). The "start of line" metacharacter ("^") matches only |
| 225 | * at the start of the string, while the "end of line" metacharacter |
| 226 | * ("$") matches only at the end of the string, or before a terminating |
| 227 | * newline (unless %G_REGEX_DOLLAR_ENDONLY is set). When |
| 228 | * %G_REGEX_MULTILINE is set, the "start of line" and "end of line" |
| 229 | * constructs match immediately following or immediately before any |
| 230 | * newline in the string, respectively, as well as at the very start |
| 231 | * and end. This can be changed within a pattern by a "(?m)" option |
| 232 | * setting. |
| 233 | * @G_REGEX_DOTALL: A dot metacharacter (".") in the pattern matches all |
| 234 | * characters, including newlines. Without it, newlines are excluded. |
| 235 | * This option can be changed within a pattern by a ("?s") option setting. |
| 236 | * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are |
| 237 | * totally ignored except when escaped or inside a character class. |
| 238 | * Whitespace does not include the VT character (code 11). In addition, |
| 239 | * characters between an unescaped "#" outside a character class and |
| 240 | * the next newline character, inclusive, are also ignored. This can |
| 241 | * be changed within a pattern by a "(?x)" option setting. |
| 242 | * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is, |
| 243 | * it is constrained to match only at the first matching point in the |
| 244 | * string that is being searched. This effect can also be achieved by |
| 245 | * appropriate constructs in the pattern itself such as the "^" |
| 246 | * metacharacter. |
| 247 | * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern |
| 248 | * matches only at the end of the string. Without this option, a |
| 249 | * dollar also matches immediately before the final character if |
| 250 | * it is a newline (but not before any other newlines). This option |
| 251 | * is ignored if %G_REGEX_MULTILINE is set. |
| 252 | * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that |
| 253 | * they are not greedy by default, but become greedy if followed by "?". |
| 254 | * It can also be set by a "(?U)" option setting within the pattern. |
| 255 | * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this |
| 256 | * flag they are considered as a raw sequence of bytes. |
| 257 | * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing |
| 258 | * parentheses in the pattern. Any opening parenthesis that is not |
| 259 | * followed by "?" behaves as if it were followed by "?:" but named |
| 260 | * parentheses can still be used for capturing (and they acquire numbers |
| 261 | * in the usual way). |
| 262 | * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will |
| 263 | * be used many times, then it may be worth the effort to optimize it |
| 264 | * to improve the speed of matches. |
| 265 | * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the |
| 266 | * first newline. Since: 2.34 |
| 267 | * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not |
| 268 | * be unique. This can be helpful for certain types of pattern when it |
| 269 | * is known that only one instance of the named subpattern can ever be |
| 270 | * matched. |
| 271 | * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is |
| 272 | * recognized. If this option is set, the only recognized newline character |
| 273 | * is '\r'. |
| 274 | * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is |
| 275 | * recognized. If this option is set, the only recognized newline character |
| 276 | * is '\n'. |
| 277 | * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is |
| 278 | * recognized. If this option is set, the only recognized newline character |
| 279 | * sequence is '\r\n'. |
| 280 | * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence |
| 281 | * is recognized. If this option is set, the only recognized newline character |
| 282 | * sequences are '\r', '\n', and '\r\n'. Since: 2.34 |
| 283 | * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence |
| 284 | * is recognised. If this option is set, then "\R" only recognizes the newline |
| 285 | * characters '\r', '\n' and '\r\n'. Since: 2.34 |
| 286 | * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with |
| 287 | * JavaScript rather than PCRE. Since: 2.34 |
| 288 | * |
| 289 | * Flags specifying compile-time options. |
| 290 | * |
| 291 | * Since: 2.14 |
| 292 | */ |
| 293 | /* Remember to update G_REGEX_COMPILE_MASK in gregex.c after |
| 294 | * adding a new flag. |
| 295 | */ |
| 296 | typedef enum |
| 297 | { |
| 298 | G_REGEX_CASELESS = 1 << 0, |
| 299 | G_REGEX_MULTILINE = 1 << 1, |
| 300 | G_REGEX_DOTALL = 1 << 2, |
| 301 | G_REGEX_EXTENDED = 1 << 3, |
| 302 | G_REGEX_ANCHORED = 1 << 4, |
| 303 | G_REGEX_DOLLAR_ENDONLY = 1 << 5, |
| 304 | G_REGEX_UNGREEDY = 1 << 9, |
| 305 | G_REGEX_RAW = 1 << 11, |
| 306 | G_REGEX_NO_AUTO_CAPTURE = 1 << 12, |
| 307 | G_REGEX_OPTIMIZE = 1 << 13, |
| 308 | G_REGEX_FIRSTLINE = 1 << 18, |
| 309 | G_REGEX_DUPNAMES = 1 << 19, |
| 310 | G_REGEX_NEWLINE_CR = 1 << 20, |
| 311 | G_REGEX_NEWLINE_LF = 1 << 21, |
| 312 | G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, |
| 313 | G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, |
| 314 | G_REGEX_BSR_ANYCRLF = 1 << 23, |
| 315 | G_REGEX_JAVASCRIPT_COMPAT = 1 << 25 |
| 316 | } GRegexCompileFlags; |
| 317 | |
| 318 | /** |
| 319 | * GRegexMatchFlags: |
| 320 | * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is, |
| 321 | * it is constrained to match only at the first matching point in the |
| 322 | * string that is being searched. This effect can also be achieved by |
| 323 | * appropriate constructs in the pattern itself such as the "^" |
| 324 | * metacharacter. |
| 325 | * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is |
| 326 | * not the beginning of a line, so the circumflex metacharacter should |
| 327 | * not match before it. Setting this without %G_REGEX_MULTILINE (at |
| 328 | * compile time) causes circumflex never to match. This option affects |
| 329 | * only the behaviour of the circumflex metacharacter, it does not |
| 330 | * affect "\A". |
| 331 | * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is |
| 332 | * not the end of a line, so the dollar metacharacter should not match |
| 333 | * it nor (except in multiline mode) a newline immediately before it. |
| 334 | * Setting this without %G_REGEX_MULTILINE (at compile time) causes |
| 335 | * dollar never to match. This option affects only the behaviour of |
| 336 | * the dollar metacharacter, it does not affect "\Z" or "\z". |
| 337 | * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid |
| 338 | * match if this option is set. If there are alternatives in the pattern, |
| 339 | * they are tried. If all the alternatives match the empty string, the |
| 340 | * entire match fails. For example, if the pattern "a?b?" is applied to |
| 341 | * a string not beginning with "a" or "b", it matches the empty string |
| 342 | * at the start of the string. With this flag set, this match is not |
| 343 | * valid, so GRegex searches further into the string for occurrences |
| 344 | * of "a" or "b". |
| 345 | * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more |
| 346 | * documentation on partial matching see g_match_info_is_partial_match(). |
| 347 | * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when |
| 348 | * creating a new #GRegex, setting the '\r' character as line terminator. |
| 349 | * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when |
| 350 | * creating a new #GRegex, setting the '\n' character as line terminator. |
| 351 | * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when |
| 352 | * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator. |
| 353 | * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when |
| 354 | * creating a new #GRegex, any Unicode newline sequence |
| 355 | * is recognised as a newline. These are '\r', '\n' and '\rn', and the |
| 356 | * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 357 | * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and |
| 358 | * U+2029 PARAGRAPH SEPARATOR. |
| 359 | * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when |
| 360 | * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence |
| 361 | * is recognized as a newline. Since: 2.34 |
| 362 | * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when |
| 363 | * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences |
| 364 | * are recognized as a newline by "\R". Since: 2.34 |
| 365 | * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when |
| 366 | * creating a new #GRegex; any Unicode newline character or character sequence |
| 367 | * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the |
| 368 | * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 369 | * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and |
| 370 | * U+2029 PARAGRAPH SEPARATOR. Since: 2.34 |
| 371 | * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for %G_REGEX_MATCH_PARTIAL. Since: 2.34 |
| 372 | * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to |
| 373 | * to %G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match |
| 374 | * is found, without continuing to search for a possible complete match. See |
| 375 | * g_match_info_is_partial_match() for more information. Since: 2.34 |
| 376 | * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like %G_REGEX_MATCH_NOTEMPTY, but only applied to |
| 377 | * the start of the matched string. For anchored |
| 378 | * patterns this can only happen for pattern containing "\K". Since: 2.34 |
| 379 | * |
| 380 | * Flags specifying match-time options. |
| 381 | * |
| 382 | * Since: 2.14 |
| 383 | */ |
| 384 | /* Remember to update G_REGEX_MATCH_MASK in gregex.c after |
| 385 | * adding a new flag. */ |
| 386 | typedef enum |
| 387 | { |
| 388 | G_REGEX_MATCH_ANCHORED = 1 << 4, |
| 389 | G_REGEX_MATCH_NOTBOL = 1 << 7, |
| 390 | G_REGEX_MATCH_NOTEOL = 1 << 8, |
| 391 | G_REGEX_MATCH_NOTEMPTY = 1 << 10, |
| 392 | G_REGEX_MATCH_PARTIAL = 1 << 15, |
| 393 | G_REGEX_MATCH_NEWLINE_CR = 1 << 20, |
| 394 | G_REGEX_MATCH_NEWLINE_LF = 1 << 21, |
| 395 | G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF, |
| 396 | G_REGEX_MATCH_NEWLINE_ANY = 1 << 22, |
| 397 | G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY, |
| 398 | G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23, |
| 399 | G_REGEX_MATCH_BSR_ANY = 1 << 24, |
| 400 | G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL, |
| 401 | G_REGEX_MATCH_PARTIAL_HARD = 1 << 27, |
| 402 | G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28 |
| 403 | } GRegexMatchFlags; |
| 404 | |
| 405 | /** |
| 406 | * GRegex: |
| 407 | * |
| 408 | * A GRegex is the "compiled" form of a regular expression pattern. |
| 409 | * This structure is opaque and its fields cannot be accessed directly. |
| 410 | * |
| 411 | * Since: 2.14 |
| 412 | */ |
| 413 | typedef struct _GRegex GRegex; |
| 414 | |
| 415 | |
| 416 | /** |
| 417 | * GMatchInfo: |
| 418 | * |
| 419 | * A GMatchInfo is an opaque struct used to return information about |
| 420 | * matches. |
| 421 | */ |
| 422 | typedef struct _GMatchInfo GMatchInfo; |
| 423 | |
| 424 | /** |
| 425 | * GRegexEvalCallback: |
| 426 | * @match_info: the #GMatchInfo generated by the match. |
| 427 | * Use g_match_info_get_regex() and g_match_info_get_string() if you |
| 428 | * need the #GRegex or the matched string. |
| 429 | * @result: a #GString containing the new string |
| 430 | * @user_data: user data passed to g_regex_replace_eval() |
| 431 | * |
| 432 | * Specifies the type of the function passed to g_regex_replace_eval(). |
| 433 | * It is called for each occurrence of the pattern in the string passed |
| 434 | * to g_regex_replace_eval(), and it should append the replacement to |
| 435 | * @result. |
| 436 | * |
| 437 | * Returns: %FALSE to continue the replacement process, %TRUE to stop it |
| 438 | * |
| 439 | * Since: 2.14 |
| 440 | */ |
| 441 | typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info, |
| 442 | GString *result, |
| 443 | gpointer user_data); |
| 444 | |
| 445 | |
| 446 | GLIB_AVAILABLE_IN_ALL |
| 447 | GRegex *g_regex_new (const gchar *pattern, |
| 448 | GRegexCompileFlags compile_options, |
| 449 | GRegexMatchFlags match_options, |
| 450 | GError **error); |
| 451 | GLIB_AVAILABLE_IN_ALL |
| 452 | GRegex *g_regex_ref (GRegex *regex); |
| 453 | GLIB_AVAILABLE_IN_ALL |
| 454 | void g_regex_unref (GRegex *regex); |
| 455 | GLIB_AVAILABLE_IN_ALL |
| 456 | const gchar *g_regex_get_pattern (const GRegex *regex); |
| 457 | GLIB_AVAILABLE_IN_ALL |
| 458 | gint g_regex_get_max_backref (const GRegex *regex); |
| 459 | GLIB_AVAILABLE_IN_ALL |
| 460 | gint g_regex_get_capture_count (const GRegex *regex); |
| 461 | GLIB_AVAILABLE_IN_ALL |
| 462 | gboolean g_regex_get_has_cr_or_lf (const GRegex *regex); |
| 463 | GLIB_AVAILABLE_IN_2_38 |
| 464 | gint g_regex_get_max_lookbehind (const GRegex *regex); |
| 465 | GLIB_AVAILABLE_IN_ALL |
| 466 | gint g_regex_get_string_number (const GRegex *regex, |
| 467 | const gchar *name); |
| 468 | GLIB_AVAILABLE_IN_ALL |
| 469 | gchar *g_regex_escape_string (const gchar *string, |
| 470 | gint length); |
| 471 | GLIB_AVAILABLE_IN_ALL |
| 472 | gchar *g_regex_escape_nul (const gchar *string, |
| 473 | gint length); |
| 474 | |
| 475 | GLIB_AVAILABLE_IN_ALL |
| 476 | GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex); |
| 477 | GLIB_AVAILABLE_IN_ALL |
| 478 | GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex); |
| 479 | |
| 480 | /* Matching. */ |
| 481 | GLIB_AVAILABLE_IN_ALL |
| 482 | gboolean g_regex_match_simple (const gchar *pattern, |
| 483 | const gchar *string, |
| 484 | GRegexCompileFlags compile_options, |
| 485 | GRegexMatchFlags match_options); |
| 486 | GLIB_AVAILABLE_IN_ALL |
| 487 | gboolean g_regex_match (const GRegex *regex, |
| 488 | const gchar *string, |
| 489 | GRegexMatchFlags match_options, |
| 490 | GMatchInfo **match_info); |
| 491 | GLIB_AVAILABLE_IN_ALL |
| 492 | gboolean g_regex_match_full (const GRegex *regex, |
| 493 | const gchar *string, |
| 494 | gssize string_len, |
| 495 | gint start_position, |
| 496 | GRegexMatchFlags match_options, |
| 497 | GMatchInfo **match_info, |
| 498 | GError **error); |
| 499 | GLIB_AVAILABLE_IN_ALL |
| 500 | gboolean g_regex_match_all (const GRegex *regex, |
| 501 | const gchar *string, |
| 502 | GRegexMatchFlags match_options, |
| 503 | GMatchInfo **match_info); |
| 504 | GLIB_AVAILABLE_IN_ALL |
| 505 | gboolean g_regex_match_all_full (const GRegex *regex, |
| 506 | const gchar *string, |
| 507 | gssize string_len, |
| 508 | gint start_position, |
| 509 | GRegexMatchFlags match_options, |
| 510 | GMatchInfo **match_info, |
| 511 | GError **error); |
| 512 | |
| 513 | /* String splitting. */ |
| 514 | GLIB_AVAILABLE_IN_ALL |
| 515 | gchar **g_regex_split_simple (const gchar *pattern, |
| 516 | const gchar *string, |
| 517 | GRegexCompileFlags compile_options, |
| 518 | GRegexMatchFlags match_options); |
| 519 | GLIB_AVAILABLE_IN_ALL |
| 520 | gchar **g_regex_split (const GRegex *regex, |
| 521 | const gchar *string, |
| 522 | GRegexMatchFlags match_options); |
| 523 | GLIB_AVAILABLE_IN_ALL |
| 524 | gchar **g_regex_split_full (const GRegex *regex, |
| 525 | const gchar *string, |
| 526 | gssize string_len, |
| 527 | gint start_position, |
| 528 | GRegexMatchFlags match_options, |
| 529 | gint max_tokens, |
| 530 | GError **error); |
| 531 | |
| 532 | /* String replacement. */ |
| 533 | GLIB_AVAILABLE_IN_ALL |
| 534 | gchar *g_regex_replace (const GRegex *regex, |
| 535 | const gchar *string, |
| 536 | gssize string_len, |
| 537 | gint start_position, |
| 538 | const gchar *replacement, |
| 539 | GRegexMatchFlags match_options, |
| 540 | GError **error); |
| 541 | GLIB_AVAILABLE_IN_ALL |
| 542 | gchar *g_regex_replace_literal (const GRegex *regex, |
| 543 | const gchar *string, |
| 544 | gssize string_len, |
| 545 | gint start_position, |
| 546 | const gchar *replacement, |
| 547 | GRegexMatchFlags match_options, |
| 548 | GError **error); |
| 549 | GLIB_AVAILABLE_IN_ALL |
| 550 | gchar *g_regex_replace_eval (const GRegex *regex, |
| 551 | const gchar *string, |
| 552 | gssize string_len, |
| 553 | gint start_position, |
| 554 | GRegexMatchFlags match_options, |
| 555 | GRegexEvalCallback eval, |
| 556 | gpointer user_data, |
| 557 | GError **error); |
| 558 | GLIB_AVAILABLE_IN_ALL |
| 559 | gboolean g_regex_check_replacement (const gchar *replacement, |
| 560 | gboolean *has_references, |
| 561 | GError **error); |
| 562 | |
| 563 | /* Match info */ |
| 564 | GLIB_AVAILABLE_IN_ALL |
| 565 | GRegex *g_match_info_get_regex (const GMatchInfo *match_info); |
| 566 | GLIB_AVAILABLE_IN_ALL |
| 567 | const gchar *g_match_info_get_string (const GMatchInfo *match_info); |
| 568 | |
| 569 | GLIB_AVAILABLE_IN_ALL |
| 570 | GMatchInfo *g_match_info_ref (GMatchInfo *match_info); |
| 571 | GLIB_AVAILABLE_IN_ALL |
| 572 | void g_match_info_unref (GMatchInfo *match_info); |
| 573 | GLIB_AVAILABLE_IN_ALL |
| 574 | void g_match_info_free (GMatchInfo *match_info); |
| 575 | GLIB_AVAILABLE_IN_ALL |
| 576 | gboolean g_match_info_next (GMatchInfo *match_info, |
| 577 | GError **error); |
| 578 | GLIB_AVAILABLE_IN_ALL |
| 579 | gboolean g_match_info_matches (const GMatchInfo *match_info); |
| 580 | GLIB_AVAILABLE_IN_ALL |
| 581 | gint g_match_info_get_match_count (const GMatchInfo *match_info); |
| 582 | GLIB_AVAILABLE_IN_ALL |
| 583 | gboolean g_match_info_is_partial_match (const GMatchInfo *match_info); |
| 584 | GLIB_AVAILABLE_IN_ALL |
| 585 | gchar *g_match_info_expand_references(const GMatchInfo *match_info, |
| 586 | const gchar *string_to_expand, |
| 587 | GError **error); |
| 588 | GLIB_AVAILABLE_IN_ALL |
| 589 | gchar *g_match_info_fetch (const GMatchInfo *match_info, |
| 590 | gint match_num); |
| 591 | GLIB_AVAILABLE_IN_ALL |
| 592 | gboolean g_match_info_fetch_pos (const GMatchInfo *match_info, |
| 593 | gint match_num, |
| 594 | gint *start_pos, |
| 595 | gint *end_pos); |
| 596 | GLIB_AVAILABLE_IN_ALL |
| 597 | gchar *g_match_info_fetch_named (const GMatchInfo *match_info, |
| 598 | const gchar *name); |
| 599 | GLIB_AVAILABLE_IN_ALL |
| 600 | gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info, |
| 601 | const gchar *name, |
| 602 | gint *start_pos, |
| 603 | gint *end_pos); |
| 604 | GLIB_AVAILABLE_IN_ALL |
| 605 | gchar **g_match_info_fetch_all (const GMatchInfo *match_info); |
| 606 | |
| 607 | G_END_DECLS |
| 608 | |
| 609 | #endif /* __G_REGEX_H__ */ |
| 610 | |