gregex.c source code [gtk/subprojects/glib/glib/gregex.c]

1	/ GRegex -- regular expression API wrapper around PCRE.*
2	*
3	* Copyright (C) 1999, 2000 Scott Wimer
4	* Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
5	* Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Lesser General Public
9	* License as published by the Free Software Foundation; either
10	* version 2.1 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Lesser General Public License for more details.
16	*
17	* You should have received a copy of the GNU Lesser General Public License
18	* along with this library; if not, see <http://www.gnu.org/licenses/>.
19	*/
20
21	#include "config.h"
22
23	#include <string.h>
24
25	#ifdef USE_SYSTEM_PCRE
26	#include <pcre.h>
27	#else
28	#include "pcre/pcre.h"
29	#endif
30
31	#include "gtypes.h"
32	#include "gregex.h"
33	#include "glibintl.h"
34	#include "glist.h"
35	#include "gmessages.h"
36	#include "gstrfuncs.h"
37	#include "gatomic.h"
38	#include "gthread.h"
39
40	/**
41	* SECTION:gregex
42	* @title: Perl-compatible regular expressions
43	* @short_description: matches strings against regular expressions
44	* @see_also: [Regular expression syntax][glib-regex-syntax]
45	*
46	* The g_regex_*() functions implement regular
47	* expression pattern matching using syntax and semantics similar to
48	* Perl regular expression.
49	*
50	* Some functions accept a @start_position argument, setting it differs
51	* from just passing over a shortened string and setting #G_REGEX_MATCH_NOTBOL
52	* in the case of a pattern that begins with any kind of lookbehind assertion.
53	* For example, consider the pattern "\Biss\B" which finds occurrences of "iss"
54	* in the middle of words. ("\B" matches only if the current position in the
55	* subject is not a word boundary.) When applied to the string "Mississipi"
56	* from the fourth byte, namely "issipi", it does not match, because "\B" is
57	* always false at the start of the subject, which is deemed to be a word
58	* boundary. However, if the entire string is passed , but with
59	* @start_position set to 4, it finds the second occurrence of "iss" because
60	* it is able to look behind the starting point to discover that it is
61	* preceded by a letter.
62	*
63	* Note that, unless you set the #G_REGEX_RAW flag, all the strings passed
64	* to these functions must be encoded in UTF-8. The lengths and the positions
65	* inside the strings are in bytes and not in characters, so, for instance,
66	* "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a
67	* single character. If you set #G_REGEX_RAW the strings can be non-valid
68	* UTF-8 strings and a byte is treated as a character, so "\xc3\xa0" is two
69	* bytes and two characters long.
70	*
71	* When matching a pattern, "\n" matches only against a "\n" character in
72	* the string, and "\r" matches only a "\r" character. To match any newline
73	* sequence use "\R". This particular group matches either the two-character
74	* sequence CR + LF ("\r\n"), or one of the single characters LF (linefeed,
75	* U+000A, "\n"), VT vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"),
76	* CR (carriage return, U+000D, "\r"), NEL (next line, U+0085), LS (line
77	* separator, U+2028), or PS (paragraph separator, U+2029).
78	*
79	* The behaviour of the dot, circumflex, and dollar metacharacters are
80	* affected by newline characters, the default is to recognize any newline
81	* character (the same characters recognized by "\R"). This can be changed
82	* with #G_REGEX_NEWLINE_CR, #G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF
83	* compile options, and with #G_REGEX_MATCH_NEWLINE_ANY,
84	* #G_REGEX_MATCH_NEWLINE_CR, #G_REGEX_MATCH_NEWLINE_LF and
85	* #G_REGEX_MATCH_NEWLINE_CRLF match options. These settings are also
86	* relevant when compiling a pattern if #G_REGEX_EXTENDED is set, and an
87	* unescaped "#" outside a character class is encountered. This indicates
88	* a comment that lasts until after the next newline.
89	*
90	* When setting the %G_REGEX_JAVASCRIPT_COMPAT flag, pattern syntax and pattern
91	* matching is changed to be compatible with the way that regular expressions
92	* work in JavaScript. More precisely, a lonely ']' character in the pattern
93	* is a syntax error; the '\x' escape only allows 0 to 2 hexadecimal digits, and
94	* you must use the '\u' escape sequence with 4 hex digits to specify a unicode
95	* codepoint instead of '\x' or 'x{....}'. If '\x' or '\u' are not followed by
96	* the specified number of hex digits, they match 'x' and 'u' literally; also
97	* '\U' always matches 'U' instead of being an error in the pattern. Finally,
98	* pattern matching is modified so that back references to an unset subpattern
99	* group produces a match with the empty string instead of an error. See
100	* pcreapi(3) for more information.
101	*
102	* Creating and manipulating the same #GRegex structure from different
103	* threads is not a problem as #GRegex does not modify its internal
104	* state between creation and destruction, on the other hand #GMatchInfo
105	* is not threadsafe.
106	*
107	* The regular expressions low-level functionalities are obtained through
108	* the excellent
109	* [PCRE](http://www.pcre.org/)
110	* library written by Philip Hazel.
111	*/
112
113	/ Mask of all the possible values for GRegexCompileFlags. /
114	#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS \| \
115	G_REGEX_MULTILINE \| \
116	G_REGEX_DOTALL \| \
117	G_REGEX_EXTENDED \| \
118	G_REGEX_ANCHORED \| \
119	G_REGEX_DOLLAR_ENDONLY \| \
120	G_REGEX_UNGREEDY \| \
121	G_REGEX_RAW \| \
122	G_REGEX_NO_AUTO_CAPTURE \| \
123	G_REGEX_OPTIMIZE \| \
124	G_REGEX_FIRSTLINE \| \
125	G_REGEX_DUPNAMES \| \
126	G_REGEX_NEWLINE_CR \| \
127	G_REGEX_NEWLINE_LF \| \
128	G_REGEX_NEWLINE_CRLF \| \
129	G_REGEX_NEWLINE_ANYCRLF \| \
130	G_REGEX_BSR_ANYCRLF \| \
131	G_REGEX_JAVASCRIPT_COMPAT)
132
133	/ Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE /
134	#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
135	#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW \| \
136	G_REGEX_OPTIMIZE)
137
138	/ Mask of all the possible values for GRegexMatchFlags. /
139	#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED \| \
140	G_REGEX_MATCH_NOTBOL \| \
141	G_REGEX_MATCH_NOTEOL \| \
142	G_REGEX_MATCH_NOTEMPTY \| \
143	G_REGEX_MATCH_PARTIAL \| \
144	G_REGEX_MATCH_NEWLINE_CR \| \
145	G_REGEX_MATCH_NEWLINE_LF \| \
146	G_REGEX_MATCH_NEWLINE_CRLF \| \
147	G_REGEX_MATCH_NEWLINE_ANY \| \
148	G_REGEX_MATCH_NEWLINE_ANYCRLF \| \
149	G_REGEX_MATCH_BSR_ANYCRLF \| \
150	G_REGEX_MATCH_BSR_ANY \| \
151	G_REGEX_MATCH_PARTIAL_SOFT \| \
152	G_REGEX_MATCH_PARTIAL_HARD \| \
153	G_REGEX_MATCH_NOTEMPTY_ATSTART)
154
155	/ we rely on these flags having the same values /
156	G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
157	G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
158	G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
159	G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
160	G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
161	G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
162	G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
163	G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
164	G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE);
165	G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
166	G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
167	G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
168	G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
169	G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
170	G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
171	G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
172
173	G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
174	G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
175	G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
176	G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
177	G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
178	G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
179	G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
180	G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
181	G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
182	G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
183	G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
184	G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
185	G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
186	G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
187	G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
188
189	/ These PCRE flags are unused or not exposed publicly in GRegexFlags, so*
190	* it should be ok to reuse them for different things.
191	*/
192	G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK);
193	G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8);
194
195	/ if the string is in UTF-8 use g_utf8_ functions, else use*
196	* use just +/- 1. */
197	#define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
198	((s) + 1) : \
199	g_utf8_next_char (s))
200	#define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
201	((s) - 1) : \
202	g_utf8_prev_char (s))
203
204	struct _GMatchInfo
205	{
206	gint ref_count; / the ref count (atomic) /
207	GRegex regex; /* the regex /
208	GRegexMatchFlags match_opts; / options used at match time on the regex /
209	gint matches; / number of matching sub patterns /
210	gint pos; / position in the string where last match left off /
211	gint n_offsets; / number of offsets /
212	gint offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc /
213	gint workspace; /* workspace for pcre_dfa_exec() /
214	gint n_workspace; / number of workspace elements /
215	const gchar string; /* string passed to the match function /
216	gssize string_len; / length of string, in bytes /
217	};
218
219	struct _GRegex
220	{
221	gint ref_count; / the ref count for the immutable part (atomic) /
222	gchar pattern; /* the pattern /
223	pcre pcre_re; /* compiled form of the pattern /
224	GRegexCompileFlags compile_opts; / options used at compile time on the pattern /
225	GRegexMatchFlags match_opts; / options used at match time on the regex /
226	pcre_extra extra; /* data stored when G_REGEX_OPTIMIZE is used /
227	};
228
229	/ TRUE if ret is an error code, FALSE otherwise. /
230	#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL)
231
232	typedef struct _InterpolationData InterpolationData;
233	static gboolean interpolation_list_needs_match (GList *list);
234	static gboolean interpolate_replacement (const GMatchInfo *match_info,
235	GString *result,
236	gpointer data);
237	static GList split_replacement (const* gchar *replacement,
238	GError **error);
239	static void free_interpolation_data (InterpolationData *data);
240
241
242	static const gchar *
243	match_error (gint errcode)
244	{
245	switch (errcode)
246	{
247	case PCRE_ERROR_NOMATCH:
248	/ not an error /
249	break;
250	case PCRE_ERROR_NULL:
251	/ NULL argument, this should not happen in GRegex /
252	g_warning ("A NULL argument was passed to PCRE");
253	break;
254	case PCRE_ERROR_BADOPTION:
255	return "bad options";
256	case PCRE_ERROR_BADMAGIC:
257	return _("corrupted object");
258	case PCRE_ERROR_UNKNOWN_OPCODE:
259	return N_("internal error or corrupted object");
260	case PCRE_ERROR_NOMEMORY:
261	return _("out of memory");
262	case PCRE_ERROR_NOSUBSTRING:
263	/ not used by pcre_exec() /
264	break;
265	case PCRE_ERROR_MATCHLIMIT:
266	return _("backtracking limit reached");
267	case PCRE_ERROR_CALLOUT:
268	/ callouts are not implemented /
269	break;
270	case PCRE_ERROR_BADUTF8:
271	case PCRE_ERROR_BADUTF8_OFFSET:
272	/ we do not check if strings are valid /
273	break;
274	case PCRE_ERROR_PARTIAL:
275	/ not an error /
276	break;
277	case PCRE_ERROR_BADPARTIAL:
278	return _("the pattern contains items not supported for partial matching");
279	case PCRE_ERROR_INTERNAL:
280	return _("internal error");
281	case PCRE_ERROR_BADCOUNT:
282	/ negative ovecsize, this should not happen in GRegex /
283	g_warning ("A negative ovecsize was passed to PCRE");
284	break;
285	case PCRE_ERROR_DFA_UITEM:
286	return _("the pattern contains items not supported for partial matching");
287	case PCRE_ERROR_DFA_UCOND:
288	return _("back references as conditions are not supported for partial matching");
289	case PCRE_ERROR_DFA_UMLIMIT:
290	/ the match_field field is not used in GRegex /
291	break;
292	case PCRE_ERROR_DFA_WSSIZE:
293	/ handled expanding the workspace /
294	break;
295	case PCRE_ERROR_DFA_RECURSE:
296	case PCRE_ERROR_RECURSIONLIMIT:
297	return _("recursion limit reached");
298	case PCRE_ERROR_BADNEWLINE:
299	return _("invalid combination of newline flags");
300	case PCRE_ERROR_BADOFFSET:
301	return _("bad offset");
302	case PCRE_ERROR_SHORTUTF8:
303	return _("short utf8");
304	case PCRE_ERROR_RECURSELOOP:
305	return _("recursion loop");
306	default:
307	break;
308	}
309	return _("unknown error");
310	}
311
312	static void
313	translate_compile_error (gint errcode, const* gchar **errmsg)
314	{
315	/ Compile errors are created adding 100 to the error code returned*
316	* by PCRE.
317	* If errcode is known we put the translatable error message in
318	* erromsg. If errcode is unknown we put the generic
319	* G_REGEX_ERROR_COMPILE error code in errcode and keep the
320	* untranslated error message returned by PCRE.
321	* Note that there can be more PCRE errors with the same GRegexError
322	* and that some PCRE errors are useless for us.
323	*/
324	*errcode += `100`;
325
326	switch (*errcode)
327	{
328	case G_REGEX_ERROR_STRAY_BACKSLASH:
329	*errmsg = _("\\ at end of pattern");
330	break;
331	case G_REGEX_ERROR_MISSING_CONTROL_CHAR:
332	*errmsg = _("\\c at end of pattern");
333	break;
334	case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE:
335	*errmsg = _("unrecognized character following \\");
336	break;
337	case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER:
338	*errmsg = _("numbers out of order in {} quantifier");
339	break;
340	case G_REGEX_ERROR_QUANTIFIER_TOO_BIG:
341	*errmsg = _("number too big in {} quantifier");
342	break;
343	case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS:
344	*errmsg = _("missing terminating ] for character class");
345	break;
346	case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS:
347	*errmsg = _("invalid escape sequence in character class");
348	break;
349	case G_REGEX_ERROR_RANGE_OUT_OF_ORDER:
350	*errmsg = _("range out of order in character class");
351	break;
352	case G_REGEX_ERROR_NOTHING_TO_REPEAT:
353	*errmsg = _("nothing to repeat");
354	break;
355	case `111`: / internal error: unexpected repeat /
356	*errcode = G_REGEX_ERROR_INTERNAL;
357	*errmsg = _("unexpected repeat");
358	break;
359	case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER:
360	*errmsg = _("unrecognized character after (? or (?-");
361	break;
362	case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS:
363	*errmsg = _("POSIX named classes are supported only within a class");
364	break;
365	case G_REGEX_ERROR_UNMATCHED_PARENTHESIS:
366	*errmsg = _("missing terminating )");
367	break;
368	case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE:
369	*errmsg = _("reference to non-existent subpattern");
370	break;
371	case G_REGEX_ERROR_UNTERMINATED_COMMENT:
372	*errmsg = _("missing ) after comment");
373	break;
374	case G_REGEX_ERROR_EXPRESSION_TOO_LARGE:
375	*errmsg = _("regular expression is too large");
376	break;
377	case G_REGEX_ERROR_MEMORY_ERROR:
378	*errmsg = _("failed to get memory");
379	break;
380	case `122`: / unmatched parentheses /
381	*errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
382	*errmsg = _(") without opening (");
383	break;
384	case `123`: / internal error: code overflow /
385	*errcode = G_REGEX_ERROR_INTERNAL;
386	*errmsg = _("code overflow");
387	break;
388	case `124`: / "unrecognized character after (?<\0 /
389	*errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
390	*errmsg = _("unrecognized character after (?<");
391	break;
392	case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND:
393	*errmsg = _("lookbehind assertion is not fixed length");
394	break;
395	case G_REGEX_ERROR_MALFORMED_CONDITION:
396	*errmsg = _("malformed number or name after (?(");
397	break;
398	case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES:
399	*errmsg = _("conditional group contains more than two branches");
400	break;
401	case G_REGEX_ERROR_ASSERTION_EXPECTED:
402	*errmsg = _("assertion expected after (?(");
403	break;
404	case `129`:
405	*errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
406	/ translators: '(?R' and '(?[+-]digits' are both meant as (groups of)*
407	* sequences here, '(?-54' would be an example for the second group.
408	*/
409	*errmsg = _("(?R or (?[+-]digits must be followed by )");
410	break;
411	case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME:
412	*errmsg = _("unknown POSIX class name");
413	break;
414	case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED:
415	*errmsg = _("POSIX collating elements are not supported");
416	break;
417	case G_REGEX_ERROR_HEX_CODE_TOO_LARGE:
418	*errmsg = _("character value in \\x{...} sequence is too large");
419	break;
420	case G_REGEX_ERROR_INVALID_CONDITION:
421	*errmsg = _("invalid condition (?(0)");
422	break;
423	case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND:
424	*errmsg = _("\\C not allowed in lookbehind assertion");
425	break;
426	case `137`: / PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 /
427	/ A number of Perl escapes are not handled by PCRE.*
428	* Therefore it explicitly raises ERR37.
429	*/
430	*errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
431	*errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported");
432	break;
433	case G_REGEX_ERROR_INFINITE_LOOP:
434	*errmsg = _("recursive call could loop indefinitely");
435	break;
436	case `141`: / unrecognized character after (?P\0 /
437	*errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
438	*errmsg = _("unrecognized character after (?P");
439	break;
440	case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR:
441	*errmsg = _("missing terminator in subpattern name");
442	break;
443	case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME:
444	*errmsg = _("two named subpatterns have the same name");
445	break;
446	case G_REGEX_ERROR_MALFORMED_PROPERTY:
447	*errmsg = _("malformed \\P or \\p sequence");
448	break;
449	case G_REGEX_ERROR_UNKNOWN_PROPERTY:
450	*errmsg = _("unknown property name after \\P or \\p");
451	break;
452	case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG:
453	*errmsg = _("subpattern name is too long (maximum 32 characters)");
454	break;
455	case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS:
456	*errmsg = _("too many named subpatterns (maximum 10,000)");
457	break;
458	case G_REGEX_ERROR_INVALID_OCTAL_VALUE:
459	*errmsg = _("octal value is greater than \\377");
460	break;
461	case `152`: / internal error: overran compiling workspace /
462	*errcode = G_REGEX_ERROR_INTERNAL;
463	*errmsg = _("overran compiling workspace");
464	break;
465	case `153`: / internal error: previously-checked referenced subpattern not found /
466	*errcode = G_REGEX_ERROR_INTERNAL;
467	*errmsg = _("previously-checked referenced subpattern not found");
468	break;
469	case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE:
470	*errmsg = _("DEFINE group contains more than one branch");
471	break;
472	case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS:
473	*errmsg = _("inconsistent NEWLINE options");
474	break;
475	case G_REGEX_ERROR_MISSING_BACK_REFERENCE:
476	*errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or "
477	"number, or by a plain number");
478	break;
479	case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE:
480	*errmsg = _("a numbered reference must not be zero");
481	break;
482	case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN:
483	errmsg = _("an argument is not allowed for (ACCEPT), (FAIL), or (COMMIT)");
484	break;
485	case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB:
486	errmsg = _("(VERB) not recognized");
487	break;
488	case G_REGEX_ERROR_NUMBER_TOO_BIG:
489	*errmsg = _("number is too big");
490	break;
491	case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME:
492	*errmsg = _("missing subpattern name after (?&");
493	break;
494	case G_REGEX_ERROR_MISSING_DIGIT:
495	*errmsg = _("digit expected after (?+");
496	break;
497	case G_REGEX_ERROR_INVALID_DATA_CHARACTER:
498	*errmsg = _("] is an invalid data character in JavaScript compatibility mode");
499	break;
500	case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
501	*errmsg = _("different names for subpatterns of the same number are not allowed");
502	break;
503	case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED:
504	errmsg = _("(MARK) must have an argument");
505	break;
506	case G_REGEX_ERROR_INVALID_CONTROL_CHAR:
507	*errmsg = _( "\\c must be followed by an ASCII character");
508	break;
509	case G_REGEX_ERROR_MISSING_NAME:
510	*errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name");
511	break;
512	case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS:
513	*errmsg = _("\\N is not supported in a class");
514	break;
515	case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES:
516	*errmsg = _("too many forward references");
517	break;
518	case G_REGEX_ERROR_NAME_TOO_LONG:
519	errmsg = _("name is too long in (MARK), (PRUNE), (SKIP), or (*THEN)");
520	break;
521	case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
522	*errmsg = _("character value in \\u.... sequence is too large");
523	break;
524
525	case `116`: / erroffset passed as NULL /
526	/ This should not happen as we never pass a NULL erroffset /
527	g_warning ("erroffset passed as NULL");
528	*errcode = G_REGEX_ERROR_COMPILE;
529	break;
530	case `117`: / unknown option bit(s) set /
531	/ This should not happen as we check options before passing them*
532	* to pcre_compile2() */
533	g_warning ("unknown option bit(s) set");
534	*errcode = G_REGEX_ERROR_COMPILE;
535	break;
536	case `132`: / this version of PCRE is compiled without UTF support /
537	case `144`: / invalid UTF-8 string /
538	case `145`: / support for \\P, \\p, and \\X has not been compiled /
539	case `167`: / this version of PCRE is not compiled with Unicode property support /
540	case `173`: / disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /
541	case `174`: / invalid UTF-16 string /
542	/ These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE*
543	* and we do not check if strings are valid */
544	case `170`: / internal error: unknown opcode in find_fixedlength() /
545	*errcode = G_REGEX_ERROR_INTERNAL;
546	break;
547
548	default:
549	*errcode = G_REGEX_ERROR_COMPILE;
550	}
551	}
552
553	/ GMatchInfo /
554
555	static GMatchInfo *
556	match_info_new (const GRegex *regex,
557	const gchar *string,
558	gint string_len,
559	gint start_position,
560	gint match_options,
561	gboolean is_dfa)
562	{
563	GMatchInfo *match_info;
564
565	if (string_len < `0`)
566	string_len = strlen (s: string);
567
568	match_info = g_new0 (GMatchInfo, `1`);
569	match_info->ref_count = `1`;
570	match_info->regex = g_regex_ref (regex: (GRegex *)regex);
571	match_info->string = string;
572	match_info->string_len = string_len;
573	match_info->matches = PCRE_ERROR_NOMATCH;
574	match_info->pos = start_position;
575	match_info->match_opts = match_options;
576
577	if (is_dfa)
578	{
579	/ These values should be enough for most cases, if they are not*
580	* enough g_regex_match_all_full() will expand them. */
581	match_info->n_offsets = `24`;
582	match_info->n_workspace = `100`;
583	match_info->workspace = g_new (gint, match_info->n_workspace);
584	}
585	else
586	{
587	gint capture_count;
588	pcre_fullinfo (regex->pcre_re, regex->extra,
589	PCRE_INFO_CAPTURECOUNT, &capture_count);
590	match_info->n_offsets = (capture_count + `1`) * `3`;
591	}
592
593	match_info->offsets = g_new0 (gint, match_info->n_offsets);
594	/ Set an invalid position for the previous match. /
595	match_info->offsets[`0`] = -`1`;
596	match_info->offsets[`1`] = -`1`;
597
598	return match_info;
599	}
600
601	/**
602	* g_match_info_get_regex:
603	* @match_info: a #GMatchInfo
604	*
605	* Returns #GRegex object used in @match_info. It belongs to Glib
606	* and must not be freed. Use g_regex_ref() if you need to keep it
607	* after you free @match_info object.
608	*
609	* Returns: #GRegex object used in @match_info
610	*
611	* Since: 2.14
612	*/
613	GRegex *
614	g_match_info_get_regex (const GMatchInfo *match_info)
615	{
616	g_return_val_if_fail (match_info != NULL, NULL);
617	return match_info->regex;
618	}
619
620	/**
621	* g_match_info_get_string:
622	* @match_info: a #GMatchInfo
623	*
624	* Returns the string searched with @match_info. This is the
625	* string passed to g_regex_match() or g_regex_replace() so
626	* you may not free it before calling this function.
627	*
628	* Returns: the string searched with @match_info
629	*
630	* Since: 2.14
631	*/
632	const gchar *
633	g_match_info_get_string (const GMatchInfo *match_info)
634	{
635	g_return_val_if_fail (match_info != NULL, NULL);
636	return match_info->string;
637	}
638
639	/**
640	* g_match_info_ref:
641	* @match_info: a #GMatchInfo
642	*
643	* Increases reference count of @match_info by 1.
644	*
645	* Returns: @match_info
646	*
647	* Since: 2.30
648	*/
649	GMatchInfo *
650	g_match_info_ref (GMatchInfo *match_info)
651	{
652	g_return_val_if_fail (match_info != NULL, NULL);
653	g_atomic_int_inc (&match_info->ref_count);
654	return match_info;
655	}
656
657	/**
658	* g_match_info_unref:
659	* @match_info: a #GMatchInfo
660	*
661	* Decreases reference count of @match_info by 1. When reference count drops
662	* to zero, it frees all the memory associated with the match_info structure.
663	*
664	* Since: 2.30
665	*/
666	void
667	g_match_info_unref (GMatchInfo *match_info)
668	{
669	if (g_atomic_int_dec_and_test (&match_info->ref_count))
670	{
671	g_regex_unref (regex: match_info->regex);
672	g_free (mem: match_info->offsets);
673	g_free (mem: match_info->workspace);
674	g_free (mem: match_info);
675	}
676	}
677
678	/**
679	* g_match_info_free:
680	* @match_info: (nullable): a #GMatchInfo, or %NULL
681	*
682	* If @match_info is not %NULL, calls g_match_info_unref(); otherwise does
683	* nothing.
684	*
685	* Since: 2.14
686	*/
687	void
688	g_match_info_free (GMatchInfo *match_info)
689	{
690	if (match_info == NULL)
691	return;
692
693	g_match_info_unref (match_info);
694	}
695
696	/**
697	* g_match_info_next:
698	* @match_info: a #GMatchInfo structure
699	* @error: location to store the error occurring, or %NULL to ignore errors
700	*
701	* Scans for the next match using the same parameters of the previous
702	* call to g_regex_match_full() or g_regex_match() that returned
703	* @match_info.
704	*
705	* The match is done on the string passed to the match function, so you
706	* cannot free it before calling this function.
707	*
708	* Returns: %TRUE is the string matched, %FALSE otherwise
709	*
710	* Since: 2.14
711	*/
712	gboolean
713	g_match_info_next (GMatchInfo *match_info,
714	GError **error)
715	{
716	gint prev_match_start;
717	gint prev_match_end;
718
719	g_return_val_if_fail (match_info != NULL, FALSE);
720	g_return_val_if_fail (error == NULL \|\| *error == NULL, FALSE);
721	g_return_val_if_fail (match_info->pos >= `0`, FALSE);
722
723	prev_match_start = match_info->offsets[`0`];
724	prev_match_end = match_info->offsets[`1`];
725
726	if (match_info->pos > match_info->string_len)
727	{
728	/ we have reached the end of the string /
729	match_info->pos = -`1`;
730	match_info->matches = PCRE_ERROR_NOMATCH;
731	return FALSE;
732	}
733
734	match_info->matches = pcre_exec (match_info->regex->pcre_re,
735	match_info->regex->extra,
736	match_info->string,
737	match_info->string_len,
738	match_info->pos,
739	match_info->regex->match_opts \| match_info->match_opts,
740	match_info->offsets,
741	match_info->n_offsets);
742	if (IS_PCRE_ERROR (match_info->matches))
743	{
744	g_set_error (err: error, G_REGEX_ERROR, code: G_REGEX_ERROR_MATCH,
745	_("Error while matching regular expression %s: %s"),
746	match_info->regex->pattern, match_error (errcode: match_info->matches));
747	return FALSE;
748	}
749
750	/ avoid infinite loops if the pattern is an empty string or something*
751	* equivalent */
752	if (match_info->pos == match_info->offsets[`1`])
753	{
754	if (match_info->pos > match_info->string_len)
755	{
756	/ we have reached the end of the string /
757	match_info->pos = -`1`;
758	match_info->matches = PCRE_ERROR_NOMATCH;
759	return FALSE;
760	}
761
762	match_info->pos = NEXT_CHAR (match_info->regex,
763	&match_info->string[match_info->pos]) -
764	match_info->string;
765	}
766	else
767	{
768	match_info->pos = match_info->offsets[`1`];
769	}
770
771	/ it's possible to get two identical matches when we are matching*
772	* empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
773	* the string is "RegExTest" we have:
774	* - search at position 0: match from 0 to 0
775	* - search at position 1: match from 3 to 3
776	* - search at position 3: match from 3 to 3 (duplicate)
777	* - search at position 4: match from 5 to 5
778	* - search at position 5: match from 5 to 5 (duplicate)
779	* - search at position 6: no match -> stop
780	* so we have to ignore the duplicates.
781	* see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */
782	if (match_info->matches >= `0` &&
783	prev_match_start == match_info->offsets[`0`] &&
784	prev_match_end == match_info->offsets[`1`])
785	{
786	/ ignore this match and search the next one /
787	return g_match_info_next (match_info, error);
788	}
789
790	return match_info->matches >= `0`;
791	}
792
793	/**
794	* g_match_info_matches:
795	* @match_info: a #GMatchInfo structure
796	*
797	* Returns whether the previous match operation succeeded.
798	*
799	* Returns: %TRUE if the previous match operation succeeded,
800	* %FALSE otherwise
801	*
802	* Since: 2.14
803	*/
804	gboolean
805	g_match_info_matches (const GMatchInfo *match_info)
806	{
807	g_return_val_if_fail (match_info != NULL, FALSE);
808
809	return match_info->matches >= `0`;
810	}
811
812	/**
813	* g_match_info_get_match_count:
814	* @match_info: a #GMatchInfo structure
815	*
816	* Retrieves the number of matched substrings (including substring 0,
817	* that is the whole matched text), so 1 is returned if the pattern
818	* has no substrings in it and 0 is returned if the match failed.
819	*
820	* If the last match was obtained using the DFA algorithm, that is
821	* using g_regex_match_all() or g_regex_match_all_full(), the retrieved
822	* count is not that of the number of capturing parentheses but that of
823	* the number of matched substrings.
824	*
825	* Returns: Number of matched substrings, or -1 if an error occurred
826	*
827	* Since: 2.14
828	*/
829	gint
830	g_match_info_get_match_count (const GMatchInfo *match_info)
831	{
832	g_return_val_if_fail (match_info, -`1`);
833
834	if (match_info->matches == PCRE_ERROR_NOMATCH)
835	/ no match /
836	return `0`;
837	else if (match_info->matches < PCRE_ERROR_NOMATCH)
838	/ error /
839	return -`1`;
840	else
841	/ match /
842	return match_info->matches;
843	}
844
845	/**
846	* g_match_info_is_partial_match:
847	* @match_info: a #GMatchInfo structure
848	*
849	* Usually if the string passed to g_regex_match*() matches as far as
850	* it goes, but is too short to match the entire pattern, %FALSE is
851	* returned. There are circumstances where it might be helpful to
852	* distinguish this case from other cases in which there is no match.
853	*
854	* Consider, for example, an application where a human is required to
855	* type in data for a field with specific formatting requirements. An
856	* example might be a date in the form ddmmmyy, defined by the pattern
857	* "^\d?\d(jan\|feb\|mar\|apr\|may\|jun\|jul\|aug\|sep\|oct\|nov\|dec)\d\d$".
858	* If the application sees the user’s keystrokes one by one, and can
859	* check that what has been typed so far is potentially valid, it is
860	* able to raise an error as soon as a mistake is made.
861	*
862	* GRegex supports the concept of partial matching by means of the
863	* #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags.
864	* When they are used, the return code for
865	* g_regex_match() or g_regex_match_full() is, as usual, %TRUE
866	* for a complete match, %FALSE otherwise. But, when these functions
867	* return %FALSE, you can check if the match was partial calling
868	* g_match_info_is_partial_match().
869	*
870	* The difference between #G_REGEX_MATCH_PARTIAL_SOFT and
871	* #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered
872	* with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a
873	* possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching
874	* stops at the partial match.
875	* When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD
876	* are set, the latter takes precedence.
877	*
878	* There were formerly some restrictions on the pattern for partial matching.
879	* The restrictions no longer apply.
880	*
881	* See pcrepartial(3) for more information on partial matching.
882	*
883	* Returns: %TRUE if the match was partial, %FALSE otherwise
884	*
885	* Since: 2.14
886	*/
887	gboolean
888	g_match_info_is_partial_match (const GMatchInfo *match_info)
889	{
890	g_return_val_if_fail (match_info != NULL, FALSE);
891
892	return match_info->matches == PCRE_ERROR_PARTIAL;
893	}
894
895	/**
896	* g_match_info_expand_references:
897	* @match_info: (nullable): a #GMatchInfo or %NULL
898	* @string_to_expand: the string to expand
899	* @error: location to store the error occurring, or %NULL to ignore errors
900	*
901	* Returns a new string containing the text in @string_to_expand with
902	* references and escape sequences expanded. References refer to the last
903	* match done with @string against @regex and have the same syntax used by
904	* g_regex_replace().
905	*
906	* The @string_to_expand must be UTF-8 encoded even if #G_REGEX_RAW was
907	* passed to g_regex_new().
908	*
909	* The backreferences are extracted from the string passed to the match
910	* function, so you cannot call this function after freeing the string.
911	*
912	* @match_info may be %NULL in which case @string_to_expand must not
913	* contain references. For instance "foo\n" does not refer to an actual
914	* pattern and '\n' merely will be replaced with \n character,
915	* while to expand "\0" (whole match) one needs the result of a match.
916	* Use g_regex_check_replacement() to find out whether @string_to_expand
917	* contains references.
918	*
919	* Returns: (nullable): the expanded string, or %NULL if an error occurred
920	*
921	* Since: 2.14
922	*/
923	gchar *
924	g_match_info_expand_references (const GMatchInfo *match_info,
925	const gchar *string_to_expand,
926	GError **error)
927	{
928	GString *result;
929	GList *list;
930	GError *tmp_error = NULL;
931
932	g_return_val_if_fail (string_to_expand != NULL, NULL);
933	g_return_val_if_fail (error == NULL \|\| *error == NULL, NULL);
934
935	list = split_replacement (replacement: string_to_expand, error: &tmp_error);
936	if (tmp_error != NULL)
937	{
938	g_propagate_error (dest: error, src: tmp_error);
939	return NULL;
940	}
941
942	if (!match_info && interpolation_list_needs_match (list))
943	{
944	g_critical ("String '%s' contains references to the match, can't "
945	"expand references without GMatchInfo object",
946	string_to_expand);
947	return NULL;
948	}
949
950	result = g_string_sized_new (dfl_size: strlen (s: string_to_expand));
951	interpolate_replacement (match_info, result, data: list);
952
953	g_list_free_full (list, free_func: (GDestroyNotify) free_interpolation_data);
954
955	return g_string_free (string: result, FALSE);
956	}
957
958	/**
959	* g_match_info_fetch:
960	* @match_info: #GMatchInfo structure
961	* @match_num: number of the sub expression
962	*
963	* Retrieves the text matching the @match_num'th capturing
964	* parentheses. 0 is the full text of the match, 1 is the first paren
965	* set, 2 the second, and so on.
966	*
967	* If @match_num is a valid sub pattern but it didn't match anything
968	* (e.g. sub pattern 1, matching "b" against "(a)?b") then an empty
969	* string is returned.
970	*
971	* If the match was obtained using the DFA algorithm, that is using
972	* g_regex_match_all() or g_regex_match_all_full(), the retrieved
973	* string is not that of a set of parentheses but that of a matched
974	* substring. Substrings are matched in reverse order of length, so
975	* 0 is the longest match.
976	*
977	* The string is fetched from the string passed to the match function,
978	* so you cannot call this function after freeing the string.
979	*
980	* Returns: (nullable): The matched substring, or %NULL if an error
981	* occurred. You have to free the string yourself
982	*
983	* Since: 2.14
984	*/
985	gchar *
986	g_match_info_fetch (const GMatchInfo *match_info,
987	gint match_num)
988	{
989	/ we cannot use pcre_get_substring() because it allocates the*
990	* string using pcre_malloc(). */
991	gchar *match = NULL;
992	gint start, end;
993
994	g_return_val_if_fail (match_info != NULL, NULL);
995	g_return_val_if_fail (match_num >= `0`, NULL);
996
997	/ match_num does not exist or it didn't matched, i.e. matching "b"*
998	* against "(a)?b" then group 0 is empty. */
999	if (!g_match_info_fetch_pos (match_info, match_num, start_pos: &start, end_pos: &end))
1000	match = NULL;
1001	else if (start == -`1`)
1002	match = g_strdup (str: "");
1003	else
1004	match = g_strndup (str: &match_info->string[start], n: end - start);
1005
1006	return match;
1007	}
1008
1009	/**
1010	* g_match_info_fetch_pos:
1011	* @match_info: #GMatchInfo structure
1012	* @match_num: number of the sub expression
1013	* @start_pos: (out) (optional): pointer to location where to store
1014	* the start position, or %NULL
1015	* @end_pos: (out) (optional): pointer to location where to store
1016	* the end position, or %NULL
1017	*
1018	* Retrieves the position in bytes of the @match_num'th capturing
1019	* parentheses. 0 is the full text of the match, 1 is the first
1020	* paren set, 2 the second, and so on.
1021	*
1022	* If @match_num is a valid sub pattern but it didn't match anything
1023	* (e.g. sub pattern 1, matching "b" against "(a)?b") then @start_pos
1024	* and @end_pos are set to -1 and %TRUE is returned.
1025	*
1026	* If the match was obtained using the DFA algorithm, that is using
1027	* g_regex_match_all() or g_regex_match_all_full(), the retrieved
1028	* position is not that of a set of parentheses but that of a matched
1029	* substring. Substrings are matched in reverse order of length, so
1030	* 0 is the longest match.
1031	*
1032	* Returns: %TRUE if the position was fetched, %FALSE otherwise. If
1033	* the position cannot be fetched, @start_pos and @end_pos are left
1034	* unchanged
1035	*
1036	* Since: 2.14
1037	*/
1038	gboolean
1039	g_match_info_fetch_pos (const GMatchInfo *match_info,
1040	gint match_num,
1041	gint *start_pos,
1042	gint *end_pos)
1043	{
1044	g_return_val_if_fail (match_info != NULL, FALSE);
1045	g_return_val_if_fail (match_num >= `0`, FALSE);
1046
1047	/ make sure the sub expression number they're requesting is less than*
1048	* the total number of sub expressions that were matched. */
1049	if (match_num >= match_info->matches)
1050	return FALSE;
1051
1052	if (start_pos != NULL)
1053	start_pos = match_info->offsets[`2` match_num];
1054
1055	if (end_pos != NULL)
1056	end_pos = match_info->offsets[`2` match_num + `1`];
1057
1058	return TRUE;
1059	}
1060
1061	/*
1062	* Returns number of first matched subpattern with name @name.
1063	* There may be more than one in case when DUPNAMES is used,
1064	* and not all subpatterns with that name match;
1065	* pcre_get_stringnumber() does not work in that case.
1066	*/
1067	static gint
1068	get_matched_substring_number (const GMatchInfo *match_info,
1069	const gchar *name)
1070	{
1071	gint entrysize;
1072	gchar first, last;
1073	guchar *entry;
1074
1075	if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES))
1076	return pcre_get_stringnumber (match_info->regex->pcre_re, name);
1077
1078	/ This code is copied from pcre_get.c: get_first_set() /
1079	entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re,
1080	name,
1081	&first,
1082	&last);
1083
1084	if (entrysize <= `0`)
1085	return entrysize;
1086
1087	for (entry = (guchar) first; entry <= (guchar) last; entry += entrysize)
1088	{
1089	gint n = (entry[`0`] << `8`) + entry[`1`];
1090	if (match_info->offsets[n*`2`] >= `0`)
1091	return n;
1092	}
1093
1094	return (first[`0`] << `8`) + first[`1`];
1095	}
1096
1097	/**
1098	* g_match_info_fetch_named:
1099	* @match_info: #GMatchInfo structure
1100	* @name: name of the subexpression
1101	*
1102	* Retrieves the text matching the capturing parentheses named @name.
1103	*
1104	* If @name is a valid sub pattern name but it didn't match anything
1105	* (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b")
1106	* then an empty string is returned.
1107	*
1108	* The string is fetched from the string passed to the match function,
1109	* so you cannot call this function after freeing the string.
1110	*
1111	* Returns: (nullable): The matched substring, or %NULL if an error
1112	* occurred. You have to free the string yourself
1113	*
1114	* Since: 2.14
1115	*/
1116	gchar *
1117	g_match_info_fetch_named (const GMatchInfo *match_info,
1118	const gchar *name)
1119	{
1120	/ we cannot use pcre_get_named_substring() because it allocates the*
1121	* string using pcre_malloc(). */
1122	gint num;
1123
1124	g_return_val_if_fail (match_info != NULL, NULL);
1125	g_return_val_if_fail (name != NULL, NULL);
1126
1127	num = get_matched_substring_number (match_info, name);
1128	if (num < `0`)
1129	return NULL;
1130	else
1131	return g_match_info_fetch (match_info, match_num: num);
1132	}
1133
1134	/**
1135	* g_match_info_fetch_named_pos:
1136	* @match_info: #GMatchInfo structure
1137	* @name: name of the subexpression
1138	* @start_pos: (out) (optional): pointer to location where to store
1139	* the start position, or %NULL
1140	* @end_pos: (out) (optional): pointer to location where to store
1141	* the end position, or %NULL
1142	*
1143	* Retrieves the position in bytes of the capturing parentheses named @name.
1144	*
1145	* If @name is a valid sub pattern name but it didn't match anything
1146	* (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b")
1147	* then @start_pos and @end_pos are set to -1 and %TRUE is returned.
1148	*
1149	* Returns: %TRUE if the position was fetched, %FALSE otherwise.
1150	* If the position cannot be fetched, @start_pos and @end_pos
1151	* are left unchanged.
1152	*
1153	* Since: 2.14
1154	*/
1155	gboolean
1156	g_match_info_fetch_named_pos (const GMatchInfo *match_info,
1157	const gchar *name,
1158	gint *start_pos,
1159	gint *end_pos)
1160	{
1161	gint num;
1162
1163	g_return_val_if_fail (match_info != NULL, FALSE);
1164	g_return_val_if_fail (name != NULL, FALSE);
1165
1166	num = get_matched_substring_number (match_info, name);
1167	if (num < `0`)
1168	return FALSE;
1169
1170	return g_match_info_fetch_pos (match_info, match_num: num, start_pos, end_pos);
1171	}
1172
1173	/**
1174	* g_match_info_fetch_all:
1175	* @match_info: a #GMatchInfo structure
1176	*
1177	* Bundles up pointers to each of the matching substrings from a match
1178	* and stores them in an array of gchar pointers. The first element in
1179	* the returned array is the match number 0, i.e. the entire matched
1180	* text.
1181	*
1182	* If a sub pattern didn't match anything (e.g. sub pattern 1, matching
1183	* "b" against "(a)?b") then an empty string is inserted.
1184	*
1185	* If the last match was obtained using the DFA algorithm, that is using
1186	* g_regex_match_all() or g_regex_match_all_full(), the retrieved
1187	* strings are not that matched by sets of parentheses but that of the
1188	* matched substring. Substrings are matched in reverse order of length,
1189	* so the first one is the longest match.
1190	*
1191	* The strings are fetched from the string passed to the match function,
1192	* so you cannot call this function after freeing the string.
1193	*
1194	* Returns: (transfer full): a %NULL-terminated array of gchar *
1195	* pointers. It must be freed using g_strfreev(). If the previous
1196	* match failed %NULL is returned
1197	*
1198	* Since: 2.14
1199	*/
1200	gchar **
1201	g_match_info_fetch_all (const GMatchInfo *match_info)
1202	{
1203	/ we cannot use pcre_get_substring_list() because the returned value*
1204	* isn't suitable for g_strfreev(). */
1205	gchar **result;
1206	gint i;
1207
1208	g_return_val_if_fail (match_info != NULL, NULL);
1209
1210	if (match_info->matches < `0`)
1211	return NULL;
1212
1213	result = g_new (gchar *, match_info->matches + `1`);
1214	for (i = `0`; i < match_info->matches; i++)
1215	result[i] = g_match_info_fetch (match_info, match_num: i);
1216	result[i] = NULL;
1217
1218	return result;
1219	}
1220
1221
1222	/ GRegex /
1223
1224	G_DEFINE_QUARK (g-regex-error-quark, g_regex_error)
1225
1226	/**
1227	* g_regex_ref:
1228	* @regex: a #GRegex
1229	*
1230	* Increases reference count of @regex by 1.
1231	*
1232	* Returns: @regex
1233	*
1234	* Since: 2.14
1235	*/
1236	GRegex *
1237	g_regex_ref (GRegex *regex)
1238	{
1239	g_return_val_if_fail (regex != NULL, NULL);
1240	g_atomic_int_inc (&regex->ref_count);
1241	return regex;
1242	}
1243
1244	/**
1245	* g_regex_unref:
1246	* @regex: a #GRegex
1247	*
1248	* Decreases reference count of @regex by 1. When reference count drops
1249	* to zero, it frees all the memory associated with the regex structure.
1250	*
1251	* Since: 2.14
1252	*/
1253	void
1254	g_regex_unref (GRegex *regex)
1255	{
1256	g_return_if_fail (regex != NULL);
1257
1258	if (g_atomic_int_dec_and_test (&regex->ref_count))
1259	{
1260	g_free (mem: regex->pattern);
1261	if (regex->pcre_re != NULL)
1262	pcre_free (regex->pcre_re);
1263	if (regex->extra != NULL)
1264	pcre_free (regex->extra);
1265	g_free (mem: regex);
1266	}
1267	}
1268
1269	/*
1270	* @match_options: (inout) (optional):
1271	*/
1272	static pcre regex_compile (const* gchar *pattern,
1273	GRegexCompileFlags compile_options,
1274	GRegexCompileFlags *compile_options_out,
1275	GRegexMatchFlags *match_options,
1276	GError **error);
1277
1278	/**
1279	* g_regex_new:
1280	* @pattern: the regular expression
1281	* @compile_options: compile options for the regular expression, or 0
1282	* @match_options: match options for the regular expression, or 0
1283	* @error: return location for a #GError
1284	*
1285	* Compiles the regular expression to an internal form, and does
1286	* the initial setup of the #GRegex structure.
1287	*
1288	* Returns: (nullable): a #GRegex structure or %NULL if an error occurred. Call
1289	* g_regex_unref() when you are done with it
1290	*
1291	* Since: 2.14
1292	*/
1293	GRegex *
1294	g_regex_new (const gchar *pattern,
1295	GRegexCompileFlags compile_options,
1296	GRegexMatchFlags match_options,
1297	GError **error)
1298	{
1299	GRegex *regex;
1300	pcre *re;
1301	const gchar *errmsg;
1302	gboolean optimize = FALSE;
1303	static gsize initialised = `0`;
1304
1305	g_return_val_if_fail (pattern != NULL, NULL);
1306	g_return_val_if_fail (error == NULL \|\| *error == NULL, NULL);
1307	g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == `0`, NULL);
1308	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, NULL);
1309
1310	if (g_once_init_enter (&initialised))
1311	{
1312	int supports_utf8, supports_ucp;
1313
1314	pcre_config (PCRE_CONFIG_UTF8, &supports_utf8);
1315	if (!supports_utf8)
1316	g_critical (_("PCRE library is compiled without UTF8 support"));
1317
1318	pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp);
1319	if (!supports_ucp)
1320	g_critical (_("PCRE library is compiled without UTF8 properties support"));
1321
1322	g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? `1` : `2`);
1323	}
1324
1325	if (G_UNLIKELY (initialised != `1`))
1326	{
1327	g_set_error_literal (err: error, G_REGEX_ERROR, code: G_REGEX_ERROR_COMPILE,
1328	_("PCRE library is compiled with incompatible options"));
1329	return NULL;
1330	}
1331
1332	/ G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK,*
1333	* as we do not need to wrap PCRE_NO_UTF8_CHECK. */
1334	if (compile_options & G_REGEX_OPTIMIZE)
1335	optimize = TRUE;
1336
1337	re = regex_compile (pattern, compile_options, compile_options_out: &compile_options,
1338	match_options: &match_options, error);
1339
1340	if (re == NULL)
1341	return NULL;
1342
1343	regex = g_new0 (GRegex, `1`);
1344	regex->ref_count = `1`;
1345	regex->pattern = g_strdup (str: pattern);
1346	regex->pcre_re = re;
1347	regex->compile_opts = compile_options;
1348	regex->match_opts = match_options;
1349
1350	if (optimize)
1351	{
1352	regex->extra = pcre_study (regex->pcre_re, `0`, &errmsg);
1353	if (errmsg != NULL)
1354	{
1355	GError *tmp_error = g_error_new (G_REGEX_ERROR,
1356	code: G_REGEX_ERROR_OPTIMIZE,
1357	_("Error while optimizing "
1358	"regular expression %s: %s"),
1359	regex->pattern,
1360	errmsg);
1361	g_propagate_error (dest: error, src: tmp_error);
1362
1363	g_regex_unref (regex);
1364	return NULL;
1365	}
1366	}
1367
1368	return regex;
1369	}
1370
1371	static pcre *
1372	regex_compile (const gchar *pattern,
1373	GRegexCompileFlags compile_options,
1374	GRegexCompileFlags *compile_options_out,
1375	GRegexMatchFlags *match_options,
1376	GError **error)
1377	{
1378	pcre *re;
1379	const gchar *errmsg;
1380	gint erroffset;
1381	gint errcode;
1382	GRegexCompileFlags nonpcre_compile_options;
1383	unsigned long int pcre_compile_options;
1384
1385	nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
1386
1387	/ In GRegex the string are, by default, UTF-8 encoded. PCRE*
1388	* instead uses UTF-8 only if required with PCRE_UTF8. */
1389	if (compile_options & G_REGEX_RAW)
1390	{
1391	/ disable utf-8 /
1392	compile_options &= ~G_REGEX_RAW;
1393	}
1394	else
1395	{
1396	/ enable utf-8 /
1397	compile_options \|= PCRE_UTF8 \| PCRE_NO_UTF8_CHECK;
1398
1399	if (match_options != NULL)
1400	*match_options \|= PCRE_NO_UTF8_CHECK;
1401	}
1402
1403	/ PCRE_NEWLINE_ANY is the default for the internal PCRE but*
1404	* not for the system one. */
1405	if (!(compile_options & G_REGEX_NEWLINE_CR) &&
1406	!(compile_options & G_REGEX_NEWLINE_LF))
1407	{
1408	compile_options \|= PCRE_NEWLINE_ANY;
1409	}
1410
1411	compile_options \|= PCRE_UCP;
1412
1413	/ PCRE_BSR_UNICODE is the default for the internal PCRE but*
1414	* possibly not for the system one.
1415	*/
1416	if (~compile_options & G_REGEX_BSR_ANYCRLF)
1417	compile_options \|= PCRE_BSR_UNICODE;
1418
1419	/ compile the pattern /
1420	re = pcre_compile2 (pattern, compile_options, &errcode,
1421	&errmsg, &erroffset, NULL);
1422
1423	/ if the compilation failed, set the error member and return*
1424	* immediately */
1425	if (re == NULL)
1426	{
1427	GError *tmp_error;
1428
1429	/ Translate the PCRE error code to GRegexError and use a translated*
1430	* error message if possible */
1431	translate_compile_error (errcode: &errcode, errmsg: &errmsg);
1432
1433	/ PCRE uses byte offsets but we want to show character offsets /
1434	erroffset = g_utf8_pointer_to_offset (str: pattern, pos: &pattern[erroffset]);
1435
1436	tmp_error = g_error_new (G_REGEX_ERROR, code: errcode,
1437	_("Error while compiling regular "
1438	"expression %s at char %d: %s"),
1439	pattern, erroffset, errmsg);
1440	g_propagate_error (dest: error, src: tmp_error);
1441
1442	return NULL;
1443	}
1444
1445	/ For options set at the beginning of the pattern, pcre puts them into*
1446	* compile options, e.g. "(?i)foo" will make the pcre structure store
1447	* PCRE_CASELESS even though it wasn't explicitly given for compilation. */
1448	pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options);
1449	compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
1450
1451	/ Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF /
1452	if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF)
1453	compile_options &= ~PCRE_NEWLINE_ANY;
1454
1455	compile_options \|= nonpcre_compile_options;
1456
1457	if (!(compile_options & G_REGEX_DUPNAMES))
1458	{
1459	gboolean jchanged = FALSE;
1460	pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1461	if (jchanged)
1462	compile_options \|= G_REGEX_DUPNAMES;
1463	}
1464
1465	if (compile_options_out != `0`)
1466	*compile_options_out = compile_options;
1467
1468	return re;
1469	}
1470
1471	/**
1472	* g_regex_get_pattern:
1473	* @regex: a #GRegex structure
1474	*
1475	* Gets the pattern string associated with @regex, i.e. a copy of
1476	* the string passed to g_regex_new().
1477	*
1478	* Returns: the pattern of @regex
1479	*
1480	* Since: 2.14
1481	*/
1482	const gchar *
1483	g_regex_get_pattern (const GRegex *regex)
1484	{
1485	g_return_val_if_fail (regex != NULL, NULL);
1486
1487	return regex->pattern;
1488	}
1489
1490	/**
1491	* g_regex_get_max_backref:
1492	* @regex: a #GRegex
1493	*
1494	* Returns the number of the highest back reference
1495	* in the pattern, or 0 if the pattern does not contain
1496	* back references.
1497	*
1498	* Returns: the number of the highest back reference
1499	*
1500	* Since: 2.14
1501	*/
1502	gint
1503	g_regex_get_max_backref (const GRegex *regex)
1504	{
1505	gint value;
1506
1507	pcre_fullinfo (regex->pcre_re, regex->extra,
1508	PCRE_INFO_BACKREFMAX, &value);
1509
1510	return value;
1511	}
1512
1513	/**
1514	* g_regex_get_capture_count:
1515	* @regex: a #GRegex
1516	*
1517	* Returns the number of capturing subpatterns in the pattern.
1518	*
1519	* Returns: the number of capturing subpatterns
1520	*
1521	* Since: 2.14
1522	*/
1523	gint
1524	g_regex_get_capture_count (const GRegex *regex)
1525	{
1526	gint value;
1527
1528	pcre_fullinfo (regex->pcre_re, regex->extra,
1529	PCRE_INFO_CAPTURECOUNT, &value);
1530
1531	return value;
1532	}
1533
1534	/**
1535	* g_regex_get_has_cr_or_lf:
1536	* @regex: a #GRegex structure
1537	*
1538	* Checks whether the pattern contains explicit CR or LF references.
1539	*
1540	* Returns: %TRUE if the pattern contains explicit CR or LF references
1541	*
1542	* Since: 2.34
1543	*/
1544	gboolean
1545	g_regex_get_has_cr_or_lf (const GRegex *regex)
1546	{
1547	gint value;
1548
1549	pcre_fullinfo (regex->pcre_re, regex->extra,
1550	PCRE_INFO_HASCRORLF, &value);
1551
1552	return !!value;
1553	}
1554
1555	/**
1556	* g_regex_get_max_lookbehind:
1557	* @regex: a #GRegex structure
1558	*
1559	* Gets the number of characters in the longest lookbehind assertion in the
1560	* pattern. This information is useful when doing multi-segment matching using
1561	* the partial matching facilities.
1562	*
1563	* Returns: the number of characters in the longest lookbehind assertion.
1564	*
1565	* Since: 2.38
1566	*/
1567	gint
1568	g_regex_get_max_lookbehind (const GRegex *regex)
1569	{
1570	gint max_lookbehind;
1571
1572	pcre_fullinfo (regex->pcre_re, regex->extra,
1573	PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind);
1574
1575	return max_lookbehind;
1576	}
1577
1578	/**
1579	* g_regex_get_compile_flags:
1580	* @regex: a #GRegex
1581	*
1582	* Returns the compile options that @regex was created with.
1583	*
1584	* Depending on the version of PCRE that is used, this may or may not
1585	* include flags set by option expressions such as `(?i)` found at the
1586	* top-level within the compiled pattern.
1587	*
1588	* Returns: flags from #GRegexCompileFlags
1589	*
1590	* Since: 2.26
1591	*/
1592	GRegexCompileFlags
1593	g_regex_get_compile_flags (const GRegex *regex)
1594	{
1595	g_return_val_if_fail (regex != NULL, `0`);
1596
1597	return regex->compile_opts;
1598	}
1599
1600	/**
1601	* g_regex_get_match_flags:
1602	* @regex: a #GRegex
1603	*
1604	* Returns the match options that @regex was created with.
1605	*
1606	* Returns: flags from #GRegexMatchFlags
1607	*
1608	* Since: 2.26
1609	*/
1610	GRegexMatchFlags
1611	g_regex_get_match_flags (const GRegex *regex)
1612	{
1613	g_return_val_if_fail (regex != NULL, `0`);
1614
1615	return regex->match_opts & G_REGEX_MATCH_MASK;
1616	}
1617
1618	/**
1619	* g_regex_match_simple:
1620	* @pattern: the regular expression
1621	* @string: the string to scan for matches
1622	* @compile_options: compile options for the regular expression, or 0
1623	* @match_options: match options, or 0
1624	*
1625	* Scans for a match in @string for @pattern.
1626	*
1627	* This function is equivalent to g_regex_match() but it does not
1628	* require to compile the pattern with g_regex_new(), avoiding some
1629	* lines of code when you need just to do a match without extracting
1630	* substrings, capture counts, and so on.
1631	*
1632	* If this function is to be called on the same @pattern more than
1633	* once, it's more efficient to compile the pattern once with
1634	* g_regex_new() and then use g_regex_match().
1635	*
1636	* Returns: %TRUE if the string matched, %FALSE otherwise
1637	*
1638	* Since: 2.14
1639	*/
1640	gboolean
1641	g_regex_match_simple (const gchar *pattern,
1642	const gchar *string,
1643	GRegexCompileFlags compile_options,
1644	GRegexMatchFlags match_options)
1645	{
1646	GRegex *regex;
1647	gboolean result;
1648
1649	regex = g_regex_new (pattern, compile_options, match_options: `0`, NULL);
1650	if (!regex)
1651	return FALSE;
1652	result = g_regex_match_full (regex, string, string_len: -`1`, start_position: `0`, match_options, NULL, NULL);
1653	g_regex_unref (regex);
1654	return result;
1655	}
1656
1657	/**
1658	* g_regex_match:
1659	* @regex: a #GRegex structure from g_regex_new()
1660	* @string: the string to scan for matches
1661	* @match_options: match options
1662	* @match_info: (out) (optional): pointer to location where to store
1663	* the #GMatchInfo, or %NULL if you do not need it
1664	*
1665	* Scans for a match in @string for the pattern in @regex.
1666	* The @match_options are combined with the match options specified
1667	* when the @regex structure was created, letting you have more
1668	* flexibility in reusing #GRegex structures.
1669	*
1670	* Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
1671	*
1672	* A #GMatchInfo structure, used to get information on the match,
1673	* is stored in @match_info if not %NULL. Note that if @match_info
1674	* is not %NULL then it is created even if the function returns %FALSE,
1675	* i.e. you must free it regardless if regular expression actually matched.
1676	*
1677	* To retrieve all the non-overlapping matches of the pattern in
1678	* string you can use g_match_info_next().
1679	*
1680	* \|[<!-- language="C" -->
1681	* static void
1682	* print_uppercase_words (const gchar *string)
1683	* {
1684	* // Print all uppercase-only words.
1685	* GRegex *regex;
1686	* GMatchInfo *match_info;
1687	*
1688	* regex = g_regex_new ("[A-Z]+", 0, 0, NULL);
1689	* g_regex_match (regex, string, 0, &match_info);
1690	* while (g_match_info_matches (match_info))
1691	* {
1692	* gchar *word = g_match_info_fetch (match_info, 0);
1693	* g_print ("Found: %s\n", word);
1694	* g_free (word);
1695	* g_match_info_next (match_info, NULL);
1696	* }
1697	* g_match_info_free (match_info);
1698	* g_regex_unref (regex);
1699	* }
1700	* ]\|
1701	*
1702	* @string is not copied and is used in #GMatchInfo internally. If
1703	* you use any #GMatchInfo method (except g_match_info_free()) after
1704	* freeing or modifying @string then the behaviour is undefined.
1705	*
1706	* Returns: %TRUE is the string matched, %FALSE otherwise
1707	*
1708	* Since: 2.14
1709	*/
1710	gboolean
1711	g_regex_match (const GRegex *regex,
1712	const gchar *string,
1713	GRegexMatchFlags match_options,
1714	GMatchInfo **match_info)
1715	{
1716	return g_regex_match_full (regex, string, string_len: -`1`, start_position: `0`, match_options,
1717	match_info, NULL);
1718	}
1719
1720	/**
1721	* g_regex_match_full:
1722	* @regex: a #GRegex structure from g_regex_new()
1723	* @string: (array length=string_len): the string to scan for matches
1724	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
1725	* @start_position: starting index of the string to match, in bytes
1726	* @match_options: match options
1727	* @match_info: (out) (optional): pointer to location where to store
1728	* the #GMatchInfo, or %NULL if you do not need it
1729	* @error: location to store the error occurring, or %NULL to ignore errors
1730	*
1731	* Scans for a match in @string for the pattern in @regex.
1732	* The @match_options are combined with the match options specified
1733	* when the @regex structure was created, letting you have more
1734	* flexibility in reusing #GRegex structures.
1735	*
1736	* Setting @start_position differs from just passing over a shortened
1737	* string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern
1738	* that begins with any kind of lookbehind assertion, such as "\b".
1739	*
1740	* Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
1741	*
1742	* A #GMatchInfo structure, used to get information on the match, is
1743	* stored in @match_info if not %NULL. Note that if @match_info is
1744	* not %NULL then it is created even if the function returns %FALSE,
1745	* i.e. you must free it regardless if regular expression actually
1746	* matched.
1747	*
1748	* @string is not copied and is used in #GMatchInfo internally. If
1749	* you use any #GMatchInfo method (except g_match_info_free()) after
1750	* freeing or modifying @string then the behaviour is undefined.
1751	*
1752	* To retrieve all the non-overlapping matches of the pattern in
1753	* string you can use g_match_info_next().
1754	*
1755	* \|[<!-- language="C" -->
1756	* static void
1757	* print_uppercase_words (const gchar *string)
1758	* {
1759	* // Print all uppercase-only words.
1760	* GRegex *regex;
1761	* GMatchInfo *match_info;
1762	* GError *error = NULL;
1763	*
1764	* regex = g_regex_new ("[A-Z]+", 0, 0, NULL);
1765	* g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
1766	* while (g_match_info_matches (match_info))
1767	* {
1768	* gchar *word = g_match_info_fetch (match_info, 0);
1769	* g_print ("Found: %s\n", word);
1770	* g_free (word);
1771	* g_match_info_next (match_info, &error);
1772	* }
1773	* g_match_info_free (match_info);
1774	* g_regex_unref (regex);
1775	* if (error != NULL)
1776	* {
1777	* g_printerr ("Error while matching: %s\n", error->message);
1778	* g_error_free (error);
1779	* }
1780	* }
1781	* ]\|
1782	*
1783	* Returns: %TRUE is the string matched, %FALSE otherwise
1784	*
1785	* Since: 2.14
1786	*/
1787	gboolean
1788	g_regex_match_full (const GRegex *regex,
1789	const gchar *string,
1790	gssize string_len,
1791	gint start_position,
1792	GRegexMatchFlags match_options,
1793	GMatchInfo **match_info,
1794	GError **error)
1795	{
1796	GMatchInfo *info;
1797	gboolean match_ok;
1798
1799	g_return_val_if_fail (regex != NULL, FALSE);
1800	g_return_val_if_fail (string != NULL, FALSE);
1801	g_return_val_if_fail (start_position >= `0`, FALSE);
1802	g_return_val_if_fail (error == NULL \|\| *error == NULL, FALSE);
1803	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, FALSE);
1804
1805	info = match_info_new (regex, string, string_len, start_position,
1806	match_options, FALSE);
1807	match_ok = g_match_info_next (match_info: info, error);
1808	if (match_info != NULL)
1809	*match_info = info;
1810	else
1811	g_match_info_free (match_info: info);
1812
1813	return match_ok;
1814	}
1815
1816	/**
1817	* g_regex_match_all:
1818	* @regex: a #GRegex structure from g_regex_new()
1819	* @string: the string to scan for matches
1820	* @match_options: match options
1821	* @match_info: (out) (optional): pointer to location where to store
1822	* the #GMatchInfo, or %NULL if you do not need it
1823	*
1824	* Using the standard algorithm for regular expression matching only
1825	* the longest match in the string is retrieved. This function uses
1826	* a different algorithm so it can retrieve all the possible matches.
1827	* For more documentation see g_regex_match_all_full().
1828	*
1829	* A #GMatchInfo structure, used to get information on the match, is
1830	* stored in @match_info if not %NULL. Note that if @match_info is
1831	* not %NULL then it is created even if the function returns %FALSE,
1832	* i.e. you must free it regardless if regular expression actually
1833	* matched.
1834	*
1835	* @string is not copied and is used in #GMatchInfo internally. If
1836	* you use any #GMatchInfo method (except g_match_info_free()) after
1837	* freeing or modifying @string then the behaviour is undefined.
1838	*
1839	* Returns: %TRUE is the string matched, %FALSE otherwise
1840	*
1841	* Since: 2.14
1842	*/
1843	gboolean
1844	g_regex_match_all (const GRegex *regex,
1845	const gchar *string,
1846	GRegexMatchFlags match_options,
1847	GMatchInfo **match_info)
1848	{
1849	return g_regex_match_all_full (regex, string, string_len: -`1`, start_position: `0`, match_options,
1850	match_info, NULL);
1851	}
1852
1853	/**
1854	* g_regex_match_all_full:
1855	* @regex: a #GRegex structure from g_regex_new()
1856	* @string: (array length=string_len): the string to scan for matches
1857	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
1858	* @start_position: starting index of the string to match, in bytes
1859	* @match_options: match options
1860	* @match_info: (out) (optional): pointer to location where to store
1861	* the #GMatchInfo, or %NULL if you do not need it
1862	* @error: location to store the error occurring, or %NULL to ignore errors
1863	*
1864	* Using the standard algorithm for regular expression matching only
1865	* the longest match in the @string is retrieved, it is not possible
1866	* to obtain all the available matches. For instance matching
1867	* "<a> <b> <c>" against the pattern "<.*>"
1868	* you get "<a> <b> <c>".
1869	*
1870	* This function uses a different algorithm (called DFA, i.e. deterministic
1871	* finite automaton), so it can retrieve all the possible matches, all
1872	* starting at the same point in the string. For instance matching
1873	* "<a> <b> <c>" against the pattern "<.*>;"
1874	* you would obtain three matches: "<a> <b> <c>",
1875	* "<a> <b>" and "<a>".
1876	*
1877	* The number of matched strings is retrieved using
1878	* g_match_info_get_match_count(). To obtain the matched strings and
1879	* their position you can use, respectively, g_match_info_fetch() and
1880	* g_match_info_fetch_pos(). Note that the strings are returned in
1881	* reverse order of length; that is, the longest matching string is
1882	* given first.
1883	*
1884	* Note that the DFA algorithm is slower than the standard one and it
1885	* is not able to capture substrings, so backreferences do not work.
1886	*
1887	* Setting @start_position differs from just passing over a shortened
1888	* string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern
1889	* that begins with any kind of lookbehind assertion, such as "\b".
1890	*
1891	* Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8.
1892	*
1893	* A #GMatchInfo structure, used to get information on the match, is
1894	* stored in @match_info if not %NULL. Note that if @match_info is
1895	* not %NULL then it is created even if the function returns %FALSE,
1896	* i.e. you must free it regardless if regular expression actually
1897	* matched.
1898	*
1899	* @string is not copied and is used in #GMatchInfo internally. If
1900	* you use any #GMatchInfo method (except g_match_info_free()) after
1901	* freeing or modifying @string then the behaviour is undefined.
1902	*
1903	* Returns: %TRUE is the string matched, %FALSE otherwise
1904	*
1905	* Since: 2.14
1906	*/
1907	gboolean
1908	g_regex_match_all_full (const GRegex *regex,
1909	const gchar *string,
1910	gssize string_len,
1911	gint start_position,
1912	GRegexMatchFlags match_options,
1913	GMatchInfo **match_info,
1914	GError **error)
1915	{
1916	GMatchInfo *info;
1917	gboolean done;
1918	pcre *pcre_re;
1919	pcre_extra *extra;
1920	gboolean retval;
1921
1922	g_return_val_if_fail (regex != NULL, FALSE);
1923	g_return_val_if_fail (string != NULL, FALSE);
1924	g_return_val_if_fail (start_position >= `0`, FALSE);
1925	g_return_val_if_fail (error == NULL \|\| *error == NULL, FALSE);
1926	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, FALSE);
1927
1928	#ifdef PCRE_NO_AUTO_POSSESS
1929	/ For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which*
1930	* is an optimization for normal regex matching, but results in omitting
1931	* some shorter matches here, and an observable behaviour change.
1932	*
1933	* DFA matching is rather niche, and very rarely used according to
1934	* codesearch.debian.net, so don't bother caching the recompiled RE. */
1935	pcre_re = regex_compile (pattern: regex->pattern,
1936	compile_options: regex->compile_opts \| PCRE_NO_AUTO_POSSESS,
1937	NULL, NULL, error);
1938
1939	if (pcre_re == NULL)
1940	return FALSE;
1941
1942	/ Not bothering to cache the optimization data either, with similar*
1943	* reasoning */
1944	extra = NULL;
1945	#else
1946	/ For PCRE < 8.33 the precompiled regex is fine. /
1947	pcre_re = regex->pcre_re;
1948	extra = regex->extra;
1949	#endif
1950
1951	info = match_info_new (regex, string, string_len, start_position,
1952	match_options, TRUE);
1953
1954	done = FALSE;
1955	while (!done)
1956	{
1957	done = TRUE;
1958	info->matches = pcre_dfa_exec (pcre_re, extra,
1959	info->string, info->string_len,
1960	info->pos,
1961	regex->match_opts \| match_options,
1962	info->offsets, info->n_offsets,
1963	info->workspace, info->n_workspace);
1964	if (info->matches == PCRE_ERROR_DFA_WSSIZE)
1965	{
1966	/ info->workspace is too small. /
1967	info->n_workspace *= `2`;
1968	info->workspace = g_realloc (mem: info->workspace,
1969	n_bytes: info->n_workspace * sizeof (gint));
1970	done = FALSE;
1971	}
1972	else if (info->matches == `0`)
1973	{
1974	/ info->offsets is too small. /
1975	info->n_offsets *= `2`;
1976	info->offsets = g_realloc (mem: info->offsets,
1977	n_bytes: info->n_offsets * sizeof (gint));
1978	done = FALSE;
1979	}
1980	else if (IS_PCRE_ERROR (info->matches))
1981	{
1982	g_set_error (err: error, G_REGEX_ERROR, code: G_REGEX_ERROR_MATCH,
1983	_("Error while matching regular expression %s: %s"),
1984	regex->pattern, match_error (errcode: info->matches));
1985	}
1986	}
1987
1988	#ifdef PCRE_NO_AUTO_POSSESS
1989	pcre_free (pcre_re);
1990	#endif
1991
1992	/ set info->pos to -1 so that a call to g_match_info_next() fails. /
1993	info->pos = -`1`;
1994	retval = info->matches >= `0`;
1995
1996	if (match_info != NULL)
1997	*match_info = info;
1998	else
1999	g_match_info_free (match_info: info);
2000
2001	return retval;
2002	}
2003
2004	/**
2005	* g_regex_get_string_number:
2006	* @regex: #GRegex structure
2007	* @name: name of the subexpression
2008	*
2009	* Retrieves the number of the subexpression named @name.
2010	*
2011	* Returns: The number of the subexpression or -1 if @name
2012	* does not exists
2013	*
2014	* Since: 2.14
2015	*/
2016	gint
2017	g_regex_get_string_number (const GRegex *regex,
2018	const gchar *name)
2019	{
2020	gint num;
2021
2022	g_return_val_if_fail (regex != NULL, -`1`);
2023	g_return_val_if_fail (name != NULL, -`1`);
2024
2025	num = pcre_get_stringnumber (regex->pcre_re, name);
2026	if (num == PCRE_ERROR_NOSUBSTRING)
2027	num = -`1`;
2028
2029	return num;
2030	}
2031
2032	/**
2033	* g_regex_split_simple:
2034	* @pattern: the regular expression
2035	* @string: the string to scan for matches
2036	* @compile_options: compile options for the regular expression, or 0
2037	* @match_options: match options, or 0
2038	*
2039	* Breaks the string on the pattern, and returns an array of
2040	* the tokens. If the pattern contains capturing parentheses,
2041	* then the text for each of the substrings will also be returned.
2042	* If the pattern does not match anywhere in the string, then the
2043	* whole string is returned as the first token.
2044	*
2045	* This function is equivalent to g_regex_split() but it does
2046	* not require to compile the pattern with g_regex_new(), avoiding
2047	* some lines of code when you need just to do a split without
2048	* extracting substrings, capture counts, and so on.
2049	*
2050	* If this function is to be called on the same @pattern more than
2051	* once, it's more efficient to compile the pattern once with
2052	* g_regex_new() and then use g_regex_split().
2053	*
2054	* As a special case, the result of splitting the empty string ""
2055	* is an empty vector, not a vector containing a single string.
2056	* The reason for this special case is that being able to represent
2057	* an empty vector is typically more useful than consistent handling
2058	* of empty elements. If you do need to represent empty elements,
2059	* you'll need to check for the empty string before calling this
2060	* function.
2061	*
2062	* A pattern that can match empty strings splits @string into
2063	* separate characters wherever it matches the empty string between
2064	* characters. For example splitting "ab c" using as a separator
2065	* "\s*", you will get "a", "b" and "c".
2066	*
2067	* Returns: (transfer full): a %NULL-terminated array of strings. Free
2068	* it using g_strfreev()
2069	*
2070	* Since: 2.14
2071	**/
2072	gchar **
2073	g_regex_split_simple (const gchar *pattern,
2074	const gchar *string,
2075	GRegexCompileFlags compile_options,
2076	GRegexMatchFlags match_options)
2077	{
2078	GRegex *regex;
2079	gchar **result;
2080
2081	regex = g_regex_new (pattern, compile_options, match_options: `0`, NULL);
2082	if (!regex)
2083	return NULL;
2084
2085	result = g_regex_split_full (regex, string, string_len: -`1`, start_position: `0`, match_options, max_tokens: `0`, NULL);
2086	g_regex_unref (regex);
2087	return result;
2088	}
2089
2090	/**
2091	* g_regex_split:
2092	* @regex: a #GRegex structure
2093	* @string: the string to split with the pattern
2094	* @match_options: match time option flags
2095	*
2096	* Breaks the string on the pattern, and returns an array of the tokens.
2097	* If the pattern contains capturing parentheses, then the text for each
2098	* of the substrings will also be returned. If the pattern does not match
2099	* anywhere in the string, then the whole string is returned as the first
2100	* token.
2101	*
2102	* As a special case, the result of splitting the empty string "" is an
2103	* empty vector, not a vector containing a single string. The reason for
2104	* this special case is that being able to represent an empty vector is
2105	* typically more useful than consistent handling of empty elements. If
2106	* you do need to represent empty elements, you'll need to check for the
2107	* empty string before calling this function.
2108	*
2109	* A pattern that can match empty strings splits @string into separate
2110	* characters wherever it matches the empty string between characters.
2111	* For example splitting "ab c" using as a separator "\s*", you will get
2112	* "a", "b" and "c".
2113	*
2114	* Returns: (transfer full): a %NULL-terminated gchar ** array. Free
2115	* it using g_strfreev()
2116	*
2117	* Since: 2.14
2118	**/
2119	gchar **
2120	g_regex_split (const GRegex *regex,
2121	const gchar *string,
2122	GRegexMatchFlags match_options)
2123	{
2124	return g_regex_split_full (regex, string, string_len: -`1`, start_position: `0`,
2125	match_options, max_tokens: `0`, NULL);
2126	}
2127
2128	/**
2129	* g_regex_split_full:
2130	* @regex: a #GRegex structure
2131	* @string: (array length=string_len): the string to split with the pattern
2132	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2133	* @start_position: starting index of the string to match, in bytes
2134	* @match_options: match time option flags
2135	* @max_tokens: the maximum number of tokens to split @string into.
2136	* If this is less than 1, the string is split completely
2137	* @error: return location for a #GError
2138	*
2139	* Breaks the string on the pattern, and returns an array of the tokens.
2140	* If the pattern contains capturing parentheses, then the text for each
2141	* of the substrings will also be returned. If the pattern does not match
2142	* anywhere in the string, then the whole string is returned as the first
2143	* token.
2144	*
2145	* As a special case, the result of splitting the empty string "" is an
2146	* empty vector, not a vector containing a single string. The reason for
2147	* this special case is that being able to represent an empty vector is
2148	* typically more useful than consistent handling of empty elements. If
2149	* you do need to represent empty elements, you'll need to check for the
2150	* empty string before calling this function.
2151	*
2152	* A pattern that can match empty strings splits @string into separate
2153	* characters wherever it matches the empty string between characters.
2154	* For example splitting "ab c" using as a separator "\s*", you will get
2155	* "a", "b" and "c".
2156	*
2157	* Setting @start_position differs from just passing over a shortened
2158	* string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern
2159	* that begins with any kind of lookbehind assertion, such as "\b".
2160	*
2161	* Returns: (transfer full): a %NULL-terminated gchar ** array. Free
2162	* it using g_strfreev()
2163	*
2164	* Since: 2.14
2165	**/
2166	gchar **
2167	g_regex_split_full (const GRegex *regex,
2168	const gchar *string,
2169	gssize string_len,
2170	gint start_position,
2171	GRegexMatchFlags match_options,
2172	gint max_tokens,
2173	GError **error)
2174	{
2175	GError *tmp_error = NULL;
2176	GMatchInfo *match_info;
2177	GList list, last;
2178	gint i;
2179	gint token_count;
2180	gboolean match_ok;
2181	/ position of the last separator. /
2182	gint last_separator_end;
2183	/ was the last match 0 bytes long? /
2184	gboolean last_match_is_empty;
2185	/ the returned array of char *s /*
2186	gchar **string_list;
2187
2188	g_return_val_if_fail (regex != NULL, NULL);
2189	g_return_val_if_fail (string != NULL, NULL);
2190	g_return_val_if_fail (start_position >= `0`, NULL);
2191	g_return_val_if_fail (error == NULL \|\| *error == NULL, NULL);
2192	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, NULL);
2193
2194	if (max_tokens <= `0`)
2195	max_tokens = G_MAXINT;
2196
2197	if (string_len < `0`)
2198	string_len = strlen (s: string);
2199
2200	/ zero-length string /
2201	if (string_len - start_position == `0`)
2202	return g_new0 (gchar *, `1`);
2203
2204	if (max_tokens == `1`)
2205	{
2206	string_list = g_new0 (gchar *, `2`);
2207	string_list[`0`] = g_strndup (str: &string[start_position],
2208	n: string_len - start_position);
2209	return string_list;
2210	}
2211
2212	list = NULL;
2213	token_count = `0`;
2214	last_separator_end = start_position;
2215	last_match_is_empty = FALSE;
2216
2217	match_ok = g_regex_match_full (regex, string, string_len, start_position,
2218	match_options, match_info: &match_info, error: &tmp_error);
2219
2220	while (tmp_error == NULL)
2221	{
2222	if (match_ok)
2223	{
2224	last_match_is_empty =
2225	(match_info->offsets[`0`] == match_info->offsets[`1`]);
2226
2227	/ we need to skip empty separators at the same position of the end*
2228	* of another separator. e.g. the string is "a b" and the separator
2229	* is " *", so from 1 to 2 we have a match and at position 2 we have
2230	* an empty match. */
2231	if (last_separator_end != match_info->offsets[`1`])
2232	{
2233	gchar *token;
2234	gint match_count;
2235
2236	token = g_strndup (str: string + last_separator_end,
2237	n: match_info->offsets[`0`] - last_separator_end);
2238	list = g_list_prepend (list, data: token);
2239	token_count++;
2240
2241	/ if there were substrings, these need to be added to*
2242	* the list. */
2243	match_count = g_match_info_get_match_count (match_info);
2244	if (match_count > `1`)
2245	{
2246	for (i = `1`; i < match_count; i++)
2247	list = g_list_prepend (list, data: g_match_info_fetch (match_info, match_num: i));
2248	}
2249	}
2250	}
2251	else
2252	{
2253	/ if there was no match, copy to end of string. /
2254	if (!last_match_is_empty)
2255	{
2256	gchar *token = g_strndup (str: string + last_separator_end,
2257	n: match_info->string_len - last_separator_end);
2258	list = g_list_prepend (list, data: token);
2259	}
2260	/ no more tokens, end the loop. /
2261	break;
2262	}
2263
2264	/ -1 to leave room for the last part. /
2265	if (token_count >= max_tokens - `1`)
2266	{
2267	/ we have reached the maximum number of tokens, so we copy*
2268	* the remaining part of the string. */
2269	if (last_match_is_empty)
2270	{
2271	/ the last match was empty, so we have moved one char*
2272	* after the real position to avoid empty matches at the
2273	* same position. */
2274	match_info->pos = PREV_CHAR (regex, &string[match_info->pos]) - string;
2275	}
2276	/ the if is needed in the case we have terminated the available*
2277	* tokens, but we are at the end of the string, so there are no
2278	* characters left to copy. */
2279	if (string_len > match_info->pos)
2280	{
2281	gchar *token = g_strndup (str: string + match_info->pos,
2282	n: string_len - match_info->pos);
2283	list = g_list_prepend (list, data: token);
2284	}
2285	/ end the loop. /
2286	break;
2287	}
2288
2289	last_separator_end = match_info->pos;
2290	if (last_match_is_empty)
2291	/ if the last match was empty, g_match_info_next() has moved*
2292	* forward to avoid infinite loops, but we still need to copy that
2293	* character. */
2294	last_separator_end = PREV_CHAR (regex, &string[last_separator_end]) - string;
2295
2296	match_ok = g_match_info_next (match_info, error: &tmp_error);
2297	}
2298	g_match_info_free (match_info);
2299	if (tmp_error != NULL)
2300	{
2301	g_propagate_error (dest: error, src: tmp_error);
2302	g_list_free_full (list, free_func: g_free);
2303	return NULL;
2304	}
2305
2306	string_list = g_new (gchar *, g_list_length (list) + `1`);
2307	i = `0`;
2308	for (last = g_list_last (list); last; last = g_list_previous (last))
2309	string_list[i++] = last->data;
2310	string_list[i] = NULL;
2311	g_list_free (list);
2312
2313	return string_list;
2314	}
2315
2316	enum
2317	{
2318	REPL_TYPE_STRING,
2319	REPL_TYPE_CHARACTER,
2320	REPL_TYPE_SYMBOLIC_REFERENCE,
2321	REPL_TYPE_NUMERIC_REFERENCE,
2322	REPL_TYPE_CHANGE_CASE
2323	};
2324
2325	typedef enum
2326	{
2327	CHANGE_CASE_NONE = `1` << `0`,
2328	CHANGE_CASE_UPPER = `1` << `1`,
2329	CHANGE_CASE_LOWER = `1` << `2`,
2330	CHANGE_CASE_UPPER_SINGLE = `1` << `3`,
2331	CHANGE_CASE_LOWER_SINGLE = `1` << `4`,
2332	CHANGE_CASE_SINGLE_MASK = CHANGE_CASE_UPPER_SINGLE \| CHANGE_CASE_LOWER_SINGLE,
2333	CHANGE_CASE_LOWER_MASK = CHANGE_CASE_LOWER \| CHANGE_CASE_LOWER_SINGLE,
2334	CHANGE_CASE_UPPER_MASK = CHANGE_CASE_UPPER \| CHANGE_CASE_UPPER_SINGLE
2335	} ChangeCase;
2336
2337	struct _InterpolationData
2338	{
2339	gchar *text;
2340	gint type;
2341	gint num;
2342	gchar c;
2343	ChangeCase change_case;
2344	};
2345
2346	static void
2347	free_interpolation_data (InterpolationData *data)
2348	{
2349	g_free (mem: data->text);
2350	g_free (mem: data);
2351	}
2352
2353	static const gchar *
2354	expand_escape (const gchar *replacement,
2355	const gchar *p,
2356	InterpolationData *data,
2357	GError **error)
2358	{
2359	const gchar q, r;
2360	gint x, d, h, i;
2361	const gchar *error_detail;
2362	gint base = `0`;
2363	GError *tmp_error = NULL;
2364
2365	p++;
2366	switch (*p)
2367	{
2368	case `'t'`:
2369	p++;
2370	data->c = `'\t'`;
2371	data->type = REPL_TYPE_CHARACTER;
2372	break;
2373	case `'n'`:
2374	p++;
2375	data->c = `'\n'`;
2376	data->type = REPL_TYPE_CHARACTER;
2377	break;
2378	case `'v'`:
2379	p++;
2380	data->c = `'\v'`;
2381	data->type = REPL_TYPE_CHARACTER;
2382	break;
2383	case `'r'`:
2384	p++;
2385	data->c = `'\r'`;
2386	data->type = REPL_TYPE_CHARACTER;
2387	break;
2388	case `'f'`:
2389	p++;
2390	data->c = `'\f'`;
2391	data->type = REPL_TYPE_CHARACTER;
2392	break;
2393	case `'a'`:
2394	p++;
2395	data->c = `'\a'`;
2396	data->type = REPL_TYPE_CHARACTER;
2397	break;
2398	case `'b'`:
2399	p++;
2400	data->c = `'\b'`;
2401	data->type = REPL_TYPE_CHARACTER;
2402	break;
2403	case `'\\'`:
2404	p++;
2405	data->c = `'\\'`;
2406	data->type = REPL_TYPE_CHARACTER;
2407	break;
2408	case `'x'`:
2409	p++;
2410	x = `0`;
2411	if (*p == `'{'`)
2412	{
2413	p++;
2414	do
2415	{
2416	h = g_ascii_xdigit_value (c: *p);
2417	if (h < `0`)
2418	{
2419	error_detail = _("hexadecimal digit or “}” expected");
2420	goto error;
2421	}
2422	x = x * `16` + h;
2423	p++;
2424	}
2425	while (*p != `'}'`);
2426	p++;
2427	}
2428	else
2429	{
2430	for (i = `0`; i < `2`; i++)
2431	{
2432	h = g_ascii_xdigit_value (c: *p);
2433	if (h < `0`)
2434	{
2435	error_detail = _("hexadecimal digit expected");
2436	goto error;
2437	}
2438	x = x * `16` + h;
2439	p++;
2440	}
2441	}
2442	data->type = REPL_TYPE_STRING;
2443	data->text = g_new0 (gchar, `8`);
2444	g_unichar_to_utf8 (c: x, outbuf: data->text);
2445	break;
2446	case `'l'`:
2447	p++;
2448	data->type = REPL_TYPE_CHANGE_CASE;
2449	data->change_case = CHANGE_CASE_LOWER_SINGLE;
2450	break;
2451	case `'u'`:
2452	p++;
2453	data->type = REPL_TYPE_CHANGE_CASE;
2454	data->change_case = CHANGE_CASE_UPPER_SINGLE;
2455	break;
2456	case `'L'`:
2457	p++;
2458	data->type = REPL_TYPE_CHANGE_CASE;
2459	data->change_case = CHANGE_CASE_LOWER;
2460	break;
2461	case `'U'`:
2462	p++;
2463	data->type = REPL_TYPE_CHANGE_CASE;
2464	data->change_case = CHANGE_CASE_UPPER;
2465	break;
2466	case `'E'`:
2467	p++;
2468	data->type = REPL_TYPE_CHANGE_CASE;
2469	data->change_case = CHANGE_CASE_NONE;
2470	break;
2471	case `'g'`:
2472	p++;
2473	if (*p != `'<'`)
2474	{
2475	error_detail = _("missing “<” in symbolic reference");
2476	goto error;
2477	}
2478	q = p + `1`;
2479	do
2480	{
2481	p++;
2482	if (!*p)
2483	{
2484	error_detail = _("unfinished symbolic reference");
2485	goto error;
2486	}
2487	}
2488	while (*p != `'>'`);
2489	if (p - q == `0`)
2490	{
2491	error_detail = _("zero-length symbolic reference");
2492	goto error;
2493	}
2494	if (g_ascii_isdigit (*q))
2495	{
2496	x = `0`;
2497	do
2498	{
2499	h = g_ascii_digit_value (c: *q);
2500	if (h < `0`)
2501	{
2502	error_detail = _("digit expected");
2503	p = q;
2504	goto error;
2505	}
2506	x = x * `10` + h;
2507	q++;
2508	}
2509	while (q != p);
2510	data->num = x;
2511	data->type = REPL_TYPE_NUMERIC_REFERENCE;
2512	}
2513	else
2514	{
2515	r = q;
2516	do
2517	{
2518	if (!g_ascii_isalnum (*r))
2519	{
2520	error_detail = _("illegal symbolic reference");
2521	p = r;
2522	goto error;
2523	}
2524	r++;
2525	}
2526	while (r != p);
2527	data->text = g_strndup (str: q, n: p - q);
2528	data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
2529	}
2530	p++;
2531	break;
2532	case `'0'`:
2533	/ if \0 is followed by a number is an octal number representing a*
2534	* character, else it is a numeric reference. */
2535	if (g_ascii_digit_value (c: *g_utf8_next_char (p)) >= `0`)
2536	{
2537	base = `8`;
2538	p = g_utf8_next_char (p);
2539	}
2540	G_GNUC_FALLTHROUGH;
2541	case `'1'`:
2542	case `'2'`:
2543	case `'3'`:
2544	case `'4'`:
2545	case `'5'`:
2546	case `'6'`:
2547	case `'7'`:
2548	case `'8'`:
2549	case `'9'`:
2550	x = `0`;
2551	d = `0`;
2552	for (i = `0`; i < `3`; i++)
2553	{
2554	h = g_ascii_digit_value (c: *p);
2555	if (h < `0`)
2556	break;
2557	if (h > `7`)
2558	{
2559	if (base == `8`)
2560	break;
2561	else
2562	base = `10`;
2563	}
2564	if (i == `2` && base == `10`)
2565	break;
2566	x = x * `8` + h;
2567	d = d * `10` + h;
2568	p++;
2569	}
2570	if (base == `8` \|\| i == `3`)
2571	{
2572	data->type = REPL_TYPE_STRING;
2573	data->text = g_new0 (gchar, `8`);
2574	g_unichar_to_utf8 (c: x, outbuf: data->text);
2575	}
2576	else
2577	{
2578	data->type = REPL_TYPE_NUMERIC_REFERENCE;
2579	data->num = d;
2580	}
2581	break;
2582	case `0`:
2583	error_detail = _("stray final “\\”");
2584	goto error;
2585	break;
2586	default:
2587	error_detail = _("unknown escape sequence");
2588	goto error;
2589	}
2590
2591	return p;
2592
2593	error:
2594	/ G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu /
2595	tmp_error = g_error_new (G_REGEX_ERROR,
2596	code: G_REGEX_ERROR_REPLACE,
2597	_("Error while parsing replacement "
2598	"text “%s” at char %lu: %s"),
2599	replacement,
2600	(gulong)(p - replacement),
2601	error_detail);
2602	g_propagate_error (dest: error, src: tmp_error);
2603
2604	return NULL;
2605	}
2606
2607	static GList *
2608	split_replacement (const gchar *replacement,
2609	GError **error)
2610	{
2611	GList *list = NULL;
2612	InterpolationData *data;
2613	const gchar p, start;
2614
2615	start = p = replacement;
2616	while (*p)
2617	{
2618	if (*p == `'\\'`)
2619	{
2620	data = g_new0 (InterpolationData, `1`);
2621	start = p = expand_escape (replacement, p, data, error);
2622	if (p == NULL)
2623	{
2624	g_list_free_full (list, free_func: (GDestroyNotify) free_interpolation_data);
2625	free_interpolation_data (data);
2626
2627	return NULL;
2628	}
2629	list = g_list_prepend (list, data);
2630	}
2631	else
2632	{
2633	p++;
2634	if (p == `'\\'` \|\| p == `'\0'`)
2635	{
2636	if (p - start > `0`)
2637	{
2638	data = g_new0 (InterpolationData, `1`);
2639	data->text = g_strndup (str: start, n: p - start);
2640	data->type = REPL_TYPE_STRING;
2641	list = g_list_prepend (list, data);
2642	}
2643	}
2644	}
2645	}
2646
2647	return g_list_reverse (list);
2648	}
2649
2650	/ Change the case of c based on change_case. /
2651	#define CHANGE_CASE(c, change_case) \
2652	(((change_case) & CHANGE_CASE_LOWER_MASK) ? \
2653	g_unichar_tolower (c) : \
2654	g_unichar_toupper (c))
2655
2656	static void
2657	string_append (GString *string,
2658	const gchar *text,
2659	ChangeCase *change_case)
2660	{
2661	gunichar c;
2662
2663	if (text[`0`] == `'\0'`)
2664	return;
2665
2666	if (*change_case == CHANGE_CASE_NONE)
2667	{
2668	g_string_append (string, val: text);
2669	}
2670	else if (*change_case & CHANGE_CASE_SINGLE_MASK)
2671	{
2672	c = g_utf8_get_char (p: text);
2673	g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
2674	g_string_append (string, g_utf8_next_char (text));
2675	*change_case = CHANGE_CASE_NONE;
2676	}
2677	else
2678	{
2679	while (*text != `'\0'`)
2680	{
2681	c = g_utf8_get_char (p: text);
2682	g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
2683	text = g_utf8_next_char (text);
2684	}
2685	}
2686	}
2687
2688	static gboolean
2689	interpolate_replacement (const GMatchInfo *match_info,
2690	GString *result,
2691	gpointer data)
2692	{
2693	GList *list;
2694	InterpolationData *idata;
2695	gchar *match;
2696	ChangeCase change_case = CHANGE_CASE_NONE;
2697
2698	for (list = data; list; list = list->next)
2699	{
2700	idata = list->data;
2701	switch (idata->type)
2702	{
2703	case REPL_TYPE_STRING:
2704	string_append (string: result, text: idata->text, change_case: &change_case);
2705	break;
2706	case REPL_TYPE_CHARACTER:
2707	g_string_append_c (result, CHANGE_CASE (idata->c, change_case));
2708	if (change_case & CHANGE_CASE_SINGLE_MASK)
2709	change_case = CHANGE_CASE_NONE;
2710	break;
2711	case REPL_TYPE_NUMERIC_REFERENCE:
2712	match = g_match_info_fetch (match_info, match_num: idata->num);
2713	if (match)
2714	{
2715	string_append (string: result, text: match, change_case: &change_case);
2716	g_free (mem: match);
2717	}
2718	break;
2719	case REPL_TYPE_SYMBOLIC_REFERENCE:
2720	match = g_match_info_fetch_named (match_info, name: idata->text);
2721	if (match)
2722	{
2723	string_append (string: result, text: match, change_case: &change_case);
2724	g_free (mem: match);
2725	}
2726	break;
2727	case REPL_TYPE_CHANGE_CASE:
2728	change_case = idata->change_case;
2729	break;
2730	}
2731	}
2732
2733	return FALSE;
2734	}
2735
2736	/ whether actual match_info is needed for replacement, i.e.*
2737	* whether there are references
2738	*/
2739	static gboolean
2740	interpolation_list_needs_match (GList *list)
2741	{
2742	while (list != NULL)
2743	{
2744	InterpolationData *data = list->data;
2745
2746	if (data->type == REPL_TYPE_SYMBOLIC_REFERENCE \|\|
2747	data->type == REPL_TYPE_NUMERIC_REFERENCE)
2748	{
2749	return TRUE;
2750	}
2751
2752	list = list->next;
2753	}
2754
2755	return FALSE;
2756	}
2757
2758	/**
2759	* g_regex_replace:
2760	* @regex: a #GRegex structure
2761	* @string: (array length=string_len): the string to perform matches against
2762	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2763	* @start_position: starting index of the string to match, in bytes
2764	* @replacement: text to replace each match with
2765	* @match_options: options for the match
2766	* @error: location to store the error occurring, or %NULL to ignore errors
2767	*
2768	* Replaces all occurrences of the pattern in @regex with the
2769	* replacement text. Backreferences of the form '\number' or
2770	* '\g<number>' in the replacement text are interpolated by the
2771	* number-th captured subexpression of the match, '\g<name>' refers
2772	* to the captured subexpression with the given name. '\0' refers
2773	* to the complete match, but '\0' followed by a number is the octal
2774	* representation of a character. To include a literal '\' in the
2775	* replacement, write '\\\\'.
2776	*
2777	* There are also escapes that changes the case of the following text:
2778	*
2779	* - \l: Convert to lower case the next character
2780	* - \u: Convert to upper case the next character
2781	* - \L: Convert to lower case till \E
2782	* - \U: Convert to upper case till \E
2783	* - \E: End case modification
2784	*
2785	* If you do not need to use backreferences use g_regex_replace_literal().
2786	*
2787	* The @replacement string must be UTF-8 encoded even if #G_REGEX_RAW was
2788	* passed to g_regex_new(). If you want to use not UTF-8 encoded strings
2789	* you can use g_regex_replace_literal().
2790	*
2791	* Setting @start_position differs from just passing over a shortened
2792	* string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern that
2793	* begins with any kind of lookbehind assertion, such as "\b".
2794	*
2795	* Returns: a newly allocated string containing the replacements
2796	*
2797	* Since: 2.14
2798	*/
2799	gchar *
2800	g_regex_replace (const GRegex *regex,
2801	const gchar *string,
2802	gssize string_len,
2803	gint start_position,
2804	const gchar *replacement,
2805	GRegexMatchFlags match_options,
2806	GError **error)
2807	{
2808	gchar *result;
2809	GList *list;
2810	GError *tmp_error = NULL;
2811
2812	g_return_val_if_fail (regex != NULL, NULL);
2813	g_return_val_if_fail (string != NULL, NULL);
2814	g_return_val_if_fail (start_position >= `0`, NULL);
2815	g_return_val_if_fail (replacement != NULL, NULL);
2816	g_return_val_if_fail (error == NULL \|\| *error == NULL, NULL);
2817	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, NULL);
2818
2819	list = split_replacement (replacement, error: &tmp_error);
2820	if (tmp_error != NULL)
2821	{
2822	g_propagate_error (dest: error, src: tmp_error);
2823	return NULL;
2824	}
2825
2826	result = g_regex_replace_eval (regex,
2827	string, string_len, start_position,
2828	match_options,
2829	eval: interpolate_replacement,
2830	user_data: (gpointer)list,
2831	error: &tmp_error);
2832	if (tmp_error != NULL)
2833	g_propagate_error (dest: error, src: tmp_error);
2834
2835	g_list_free_full (list, free_func: (GDestroyNotify) free_interpolation_data);
2836
2837	return result;
2838	}
2839
2840	static gboolean
2841	literal_replacement (const GMatchInfo *match_info,
2842	GString *result,
2843	gpointer data)
2844	{
2845	g_string_append (string: result, val: data);
2846	return FALSE;
2847	}
2848
2849	/**
2850	* g_regex_replace_literal:
2851	* @regex: a #GRegex structure
2852	* @string: (array length=string_len): the string to perform matches against
2853	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2854	* @start_position: starting index of the string to match, in bytes
2855	* @replacement: text to replace each match with
2856	* @match_options: options for the match
2857	* @error: location to store the error occurring, or %NULL to ignore errors
2858	*
2859	* Replaces all occurrences of the pattern in @regex with the
2860	* replacement text. @replacement is replaced literally, to
2861	* include backreferences use g_regex_replace().
2862	*
2863	* Setting @start_position differs from just passing over a
2864	* shortened string and setting #G_REGEX_MATCH_NOTBOL in the
2865	* case of a pattern that begins with any kind of lookbehind
2866	* assertion, such as "\b".
2867	*
2868	* Returns: a newly allocated string containing the replacements
2869	*
2870	* Since: 2.14
2871	*/
2872	gchar *
2873	g_regex_replace_literal (const GRegex *regex,
2874	const gchar *string,
2875	gssize string_len,
2876	gint start_position,
2877	const gchar *replacement,
2878	GRegexMatchFlags match_options,
2879	GError **error)
2880	{
2881	g_return_val_if_fail (replacement != NULL, NULL);
2882	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, NULL);
2883
2884	return g_regex_replace_eval (regex,
2885	string, string_len, start_position,
2886	match_options,
2887	eval: literal_replacement,
2888	user_data: (gpointer)replacement,
2889	error);
2890	}
2891
2892	/**
2893	* g_regex_replace_eval:
2894	* @regex: a #GRegex structure from g_regex_new()
2895	* @string: (array length=string_len): string to perform matches against
2896	* @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated
2897	* @start_position: starting index of the string to match, in bytes
2898	* @match_options: options for the match
2899	* @eval: a function to call for each match
2900	* @user_data: user data to pass to the function
2901	* @error: location to store the error occurring, or %NULL to ignore errors
2902	*
2903	* Replaces occurrences of the pattern in regex with the output of
2904	* @eval for that occurrence.
2905	*
2906	* Setting @start_position differs from just passing over a shortened
2907	* string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern
2908	* that begins with any kind of lookbehind assertion, such as "\b".
2909	*
2910	* The following example uses g_regex_replace_eval() to replace multiple
2911	* strings at once:
2912	* \|[<!-- language="C" -->
2913	* static gboolean
2914	* eval_cb (const GMatchInfo *info,
2915	* GString *res,
2916	* gpointer data)
2917	* {
2918	* gchar *match;
2919	* gchar *r;
2920	*
2921	* match = g_match_info_fetch (info, 0);
2922	* r = g_hash_table_lookup ((GHashTable *)data, match);
2923	* g_string_append (res, r);
2924	* g_free (match);
2925	*
2926	* return FALSE;
2927	* }
2928	*
2929	* ...
2930	*
2931	* GRegex *reg;
2932	* GHashTable *h;
2933	* gchar *res;
2934	*
2935	* h = g_hash_table_new (g_str_hash, g_str_equal);
2936	*
2937	* g_hash_table_insert (h, "1", "ONE");
2938	* g_hash_table_insert (h, "2", "TWO");
2939	* g_hash_table_insert (h, "3", "THREE");
2940	* g_hash_table_insert (h, "4", "FOUR");
2941	*
2942	* reg = g_regex_new ("1\|2\|3\|4", 0, 0, NULL);
2943	* res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL);
2944	* g_hash_table_destroy (h);
2945	*
2946	* ...
2947	* ]\|
2948	*
2949	* Returns: a newly allocated string containing the replacements
2950	*
2951	* Since: 2.14
2952	*/
2953	gchar *
2954	g_regex_replace_eval (const GRegex *regex,
2955	const gchar *string,
2956	gssize string_len,
2957	gint start_position,
2958	GRegexMatchFlags match_options,
2959	GRegexEvalCallback eval,
2960	gpointer user_data,
2961	GError **error)
2962	{
2963	GMatchInfo *match_info;
2964	GString *result;
2965	gint str_pos = `0`;
2966	gboolean done = FALSE;
2967	GError *tmp_error = NULL;
2968
2969	g_return_val_if_fail (regex != NULL, NULL);
2970	g_return_val_if_fail (string != NULL, NULL);
2971	g_return_val_if_fail (start_position >= `0`, NULL);
2972	g_return_val_if_fail (eval != NULL, NULL);
2973	g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == `0`, NULL);
2974
2975	if (string_len < `0`)
2976	string_len = strlen (s: string);
2977
2978	result = g_string_sized_new (dfl_size: string_len);
2979
2980	/ run down the string making matches. /
2981	g_regex_match_full (regex, string, string_len, start_position,
2982	match_options, match_info: &match_info, error: &tmp_error);
2983	while (!done && g_match_info_matches (match_info))
2984	{
2985	g_string_append_len (string: result,
2986	val: string + str_pos,
2987	len: match_info->offsets[`0`] - str_pos);
2988	done = (*eval) (match_info, result, user_data);
2989	str_pos = match_info->offsets[`1`];
2990	g_match_info_next (match_info, error: &tmp_error);
2991	}
2992	g_match_info_free (match_info);
2993	if (tmp_error != NULL)
2994	{
2995	g_propagate_error (dest: error, src: tmp_error);
2996	g_string_free (string: result, TRUE);
2997	return NULL;
2998	}
2999
3000	g_string_append_len (string: result, val: string + str_pos, len: string_len - str_pos);
3001	return g_string_free (string: result, FALSE);
3002	}
3003
3004	/**
3005	* g_regex_check_replacement:
3006	* @replacement: the replacement string
3007	* @has_references: (out) (optional): location to store information about
3008	* references in @replacement or %NULL
3009	* @error: location to store error
3010	*
3011	* Checks whether @replacement is a valid replacement string
3012	* (see g_regex_replace()), i.e. that all escape sequences in
3013	* it are valid.
3014	*
3015	* If @has_references is not %NULL then @replacement is checked
3016	* for pattern references. For instance, replacement text 'foo\n'
3017	* does not contain references and may be evaluated without information
3018	* about actual match, but '\0\1' (whole match followed by first
3019	* subpattern) requires valid #GMatchInfo object.
3020	*
3021	* Returns: whether @replacement is a valid replacement string
3022	*
3023	* Since: 2.14
3024	*/
3025	gboolean
3026	g_regex_check_replacement (const gchar *replacement,
3027	gboolean *has_references,
3028	GError **error)
3029	{
3030	GList *list;
3031	GError *tmp = NULL;
3032
3033	list = split_replacement (replacement, error: &tmp);
3034
3035	if (tmp)
3036	{
3037	g_propagate_error (dest: error, src: tmp);
3038	return FALSE;
3039	}
3040
3041	if (has_references)
3042	*has_references = interpolation_list_needs_match (list);
3043
3044	g_list_free_full (list, free_func: (GDestroyNotify) free_interpolation_data);
3045
3046	return TRUE;
3047	}
3048
3049	/**
3050	* g_regex_escape_nul:
3051	* @string: the string to escape
3052	* @length: the length of @string
3053	*
3054	* Escapes the nul characters in @string to "\x00". It can be used
3055	* to compile a regex with embedded nul characters.
3056	*
3057	* For completeness, @length can be -1 for a nul-terminated string.
3058	* In this case the output string will be of course equal to @string.
3059	*
3060	* Returns: a newly-allocated escaped string
3061	*
3062	* Since: 2.30
3063	*/
3064	gchar *
3065	g_regex_escape_nul (const gchar *string,
3066	gint length)
3067	{
3068	GString *escaped;
3069	const gchar p, piece_start, *end;
3070	gint backslashes;
3071
3072	g_return_val_if_fail (string != NULL, NULL);
3073
3074	if (length < `0`)
3075	return g_strdup (str: string);
3076
3077	end = string + length;
3078	p = piece_start = string;
3079	escaped = g_string_sized_new (dfl_size: length + `1`);
3080
3081	backslashes = `0`;
3082	while (p < end)
3083	{
3084	switch (*p)
3085	{
3086	case `'\0'`:
3087	if (p != piece_start)
3088	{
3089	/ copy the previous piece. /
3090	g_string_append_len (string: escaped, val: piece_start, len: p - piece_start);
3091	}
3092	if ((backslashes & `1`) == `0`)
3093	g_string_append_c (escaped, `'\\'`);
3094	g_string_append_c (escaped, `'x'`);
3095	g_string_append_c (escaped, `'0'`);
3096	g_string_append_c (escaped, `'0'`);
3097	piece_start = ++p;
3098	backslashes = `0`;
3099	break;
3100	case `'\\'`:
3101	backslashes++;
3102	++p;
3103	break;
3104	default:
3105	backslashes = `0`;
3106	p = g_utf8_next_char (p);
3107	break;
3108	}
3109	}
3110
3111	if (piece_start < end)
3112	g_string_append_len (string: escaped, val: piece_start, len: end - piece_start);
3113
3114	return g_string_free (string: escaped, FALSE);
3115	}
3116
3117	/**
3118	* g_regex_escape_string:
3119	* @string: (array length=length): the string to escape
3120	* @length: the length of @string, in bytes, or -1 if @string is nul-terminated
3121	*
3122	* Escapes the special characters used for regular expressions
3123	* in @string, for instance "a.bc" becomes "a\.b\c". This
3124	* function is useful to dynamically generate regular expressions.
3125	*
3126	* @string can contain nul characters that are replaced with "\0",
3127	* in this case remember to specify the correct length of @string
3128	* in @length.
3129	*
3130	* Returns: a newly-allocated escaped string
3131	*
3132	* Since: 2.14
3133	*/
3134	gchar *
3135	g_regex_escape_string (const gchar *string,
3136	gint length)
3137	{
3138	GString *escaped;
3139	const char p, piece_start, *end;
3140
3141	g_return_val_if_fail (string != NULL, NULL);
3142
3143	if (length < `0`)
3144	length = strlen (s: string);
3145
3146	end = string + length;
3147	p = piece_start = string;
3148	escaped = g_string_sized_new (dfl_size: length + `1`);
3149
3150	while (p < end)
3151	{
3152	switch (*p)
3153	{
3154	case `'\0'`:
3155	case `'\\'`:
3156	case `'\|'`:
3157	case `'('`:
3158	case `')'`:
3159	case `'['`:
3160	case `']'`:
3161	case `'{'`:
3162	case `'}'`:
3163	case `'^'`:
3164	case `'$'`:
3165	case `'*'`:
3166	case `'+'`:
3167	case `'?'`:
3168	case `'.'`:
3169	if (p != piece_start)
3170	/ copy the previous piece. /
3171	g_string_append_len (string: escaped, val: piece_start, len: p - piece_start);
3172	g_string_append_c (escaped, `'\\'`);
3173	if (*p == `'\0'`)
3174	g_string_append_c (escaped, `'0'`);
3175	else
3176	g_string_append_c (escaped, *p);
3177	piece_start = ++p;
3178	break;
3179	default:
3180	p = g_utf8_next_char (p);
3181	break;
3182	}
3183	}
3184
3185	if (piece_start < end)
3186	g_string_append_len (string: escaped, val: piece_start, len: end - piece_start);
3187
3188	return g_string_free (string: escaped, FALSE);
3189	}
3190

source code of gtk/subprojects/glib/glib/gregex.c