regex.c source code [libiberty/regex.c]

1	/ Extended regular expression matching and search library,*
2	version 0.12.
3	(Implements POSIX draft P1003.2/D11.2, except for some of the
4	internationalization features.)
5
6	Copyright (C) 1993-2026 Free Software Foundation, Inc.
7	This file is part of the GNU C Library.
8
9	The GNU C Library is free software; you can redistribute it and/or
10	modify it under the terms of the GNU Lesser General Public
11	License as published by the Free Software Foundation; either
12	version 2.1 of the License, or (at your option) any later version.
13
14	The GNU C Library is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	Lesser General Public License for more details.
18
19	You should have received a copy of the GNU Lesser General Public
20	License along with the GNU C Library; if not, write to the Free
21	Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22	02110-1301 USA. /*
23
24	/ This file has been modified for usage in libiberty. It includes "xregex.h"*
25	instead of <regex.h>. The "xregex.h" header file renames all external
26	routines with an "x" prefix so they do not collide with the native regex
27	routines or with other components regex routines. /*
28	/ AIX requires this to be the first thing in the file. /
29	#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
30	#pragma alloca
31	#endif
32
33	#if __GNUC__ >= 12
34	# pragma GCC diagnostic ignored "-Wuse-after-free"
35	#endif
36
37	#undef _GNU_SOURCE
38	#define _GNU_SOURCE
39
40	#ifndef INSIDE_RECURSION
41	# ifdef HAVE_CONFIG_H
42	# include <config.h>
43	# endif
44	#endif
45
46	#include <ansidecl.h>
47
48	#ifndef INSIDE_RECURSION
49
50	# if defined STDC_HEADERS && !defined emacs
51	# include <stddef.h>
52	# define PTR_INT_TYPE ptrdiff_t
53	# else
54	/ We need this for `regex.h', and perhaps for the Emacs include files. /
55	# include <sys/types.h>
56	# define PTR_INT_TYPE long
57	# endif
58
59	# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
60
61	/ For platform which support the ISO C amendement 1 functionality we*
62	support user defined character classes. /*
63	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
64	/ Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. /
65	# include <wchar.h>
66	# include <wctype.h>
67	# endif
68
69	# ifdef _LIBC
70	/ We have to keep the namespace clean. /
71	# define regfree(preg) __regfree (preg)
72	# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
73	# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
74	# define regerror(errcode, preg, errbuf, errbuf_size) \
75	__regerror(errcode, preg, errbuf, errbuf_size)
76	# define re_set_registers(bu, re, nu, st, en) \
77	__re_set_registers (bu, re, nu, st, en)
78	# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
79	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
80	# define re_match(bufp, string, size, pos, regs) \
81	__re_match (bufp, string, size, pos, regs)
82	# define re_search(bufp, string, size, startpos, range, regs) \
83	__re_search (bufp, string, size, startpos, range, regs)
84	# define re_compile_pattern(pattern, length, bufp) \
85	__re_compile_pattern (pattern, length, bufp)
86	# define re_set_syntax(syntax) __re_set_syntax (syntax)
87	# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
88	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
89	# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
90
91	# define btowc __btowc
92
93	/ We are also using some library internals. /
94	# include <locale/localeinfo.h>
95	# include <locale/elem-hash.h>
96	# include <langinfo.h>
97	# include <locale/coll-lookup.h>
98	# endif
99
100	/ This is for other GNU distributions with internationalized messages. /
101	# if (HAVE_LIBINTL_H && ENABLE_NLS) \|\| defined _LIBC
102	# include <libintl.h>
103	# ifdef _LIBC
104	# undef gettext
105	# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
106	# endif
107	# else
108	# define gettext(msgid) (msgid)
109	# endif
110
111	# ifndef gettext_noop
112	/ This define is so xgettext can find the internationalizable*
113	strings. /*
114	# define gettext_noop(String) String
115	# endif
116
117	/ The `emacs' switch turns on certain matching commands*
118	that make sense only in Emacs. /*
119	# ifdef emacs
120
121	# include "lisp.h"
122	# include "buffer.h"
123	# include "syntax.h"
124
125	# else /* not emacs */
126
127	/ If we are not linking with Emacs proper,*
128	we can't use the relocating allocator
129	even if config.h says that we can. /*
130	# undef REL_ALLOC
131
132	# if defined STDC_HEADERS \|\| defined _LIBC
133	# include <stdlib.h>
134	# else
135	char *malloc ();
136	char *realloc ();
137	# endif
138
139	/ When used in Emacs's lib-src, we need to get bzero and bcopy somehow.*
140	If nothing else has been done, use the method below. /*
141	# ifdef INHIBIT_STRING_HEADER
142	# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
143	# if !defined bzero && !defined bcopy
144	# undef INHIBIT_STRING_HEADER
145	# endif
146	# endif
147	# endif
148
149	/ This is the normal way of making sure we have a bcopy and a bzero.*
150	This is used in most programs--a few other programs avoid this
151	by defining INHIBIT_STRING_HEADER. /*
152	# ifndef INHIBIT_STRING_HEADER
153	# if defined HAVE_STRING_H \|\| defined STDC_HEADERS \|\| defined _LIBC
154	# include <string.h>
155	# ifndef bzero
156	# ifndef _LIBC
157	# define bzero(s, n) ((void) memset (s, '\0', n))
158	# else
159	# define bzero(s, n) __bzero (s, n)
160	# endif
161	# endif
162	# else
163	# include <strings.h>
164	# ifndef memcmp
165	# define memcmp(s1, s2, n) bcmp (s1, s2, n)
166	# endif
167	# ifndef memcpy
168	# define memcpy(d, s, n) (bcopy (s, d, n), (d))
169	# endif
170	# endif
171	# endif
172
173	/ Define the syntax stuff for \<, \>, etc. /
174
175	/ This must be nonzero for the wordchar and notwordchar pattern*
176	commands in re_match_2. /*
177	# ifndef Sword
178	# define Sword 1
179	# endif
180
181	# ifdef SWITCH_ENUM_BUG
182	# define SWITCH_ENUM_CAST(x) ((int)(x))
183	# else
184	# define SWITCH_ENUM_CAST(x) (x)
185	# endif
186
187	# endif /* not emacs */
188
189	# if defined _LIBC \|\| HAVE_LIMITS_H
190	# include <limits.h>
191	# endif
192
193	# ifndef MB_LEN_MAX
194	# define MB_LEN_MAX 1
195	# endif
196
197	/ Get the interface, including the syntax bits. /
198	# include "xregex.h" /* change for libiberty */
199
200	/ isalpha etc. are used for the character classes. /
201	# include <ctype.h>
202
203	/ Jim Meyering writes:*
204
205	"... Some ctype macros are valid only for character codes that
206	isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
207	using /bin/cc or gcc but without giving an ansi option). So, all
208	ctype uses should be through macros like ISPRINT... If
209	STDC_HEADERS is defined, then autoconf has verified that the ctype
210	macros don't need to be guarded with references to isascii. ...
211	Defining isascii to 1 should let any compiler worth its salt
212	eliminate the && through constant folding."
213	Solaris defines some of these symbols so we must undefine them first. /*
214
215	# undef ISASCII
216	# if defined STDC_HEADERS \|\| (!defined isascii && !defined HAVE_ISASCII)
217	# define ISASCII(c) 1
218	# else
219	# define ISASCII(c) isascii(c)
220	# endif
221
222	# ifdef isblank
223	# define ISBLANK(c) (ISASCII (c) && isblank (c))
224	# else
225	# define ISBLANK(c) ((c) == ' ' \|\| (c) == '\t')
226	# endif
227	# ifdef isgraph
228	# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
229	# else
230	# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
231	# endif
232
233	# undef ISPRINT
234	# define ISPRINT(c) (ISASCII (c) && isprint (c))
235	# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
236	# define ISALNUM(c) (ISASCII (c) && isalnum (c))
237	# define ISALPHA(c) (ISASCII (c) && isalpha (c))
238	# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
239	# define ISLOWER(c) (ISASCII (c) && islower (c))
240	# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
241	# define ISSPACE(c) (ISASCII (c) && isspace (c))
242	# define ISUPPER(c) (ISASCII (c) && isupper (c))
243	# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
244
245	# ifdef _tolower
246	# define TOLOWER(c) _tolower(c)
247	# else
248	# define TOLOWER(c) tolower(c)
249	# endif
250
251	# ifndef NULL
252	# define NULL (void *)0
253	# endif
254
255	/ We remove any previous definition of `SIGN_EXTEND_CHAR',*
256	since ours (we hope) works properly with all combinations of
257	machines, compilers, `char' and `unsigned char' argument types.
258	(Per Bothner suggested the basic approach.) /*
259	# undef SIGN_EXTEND_CHAR
260	# if __STDC__
261	# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
262	# else /* not __STDC__ */
263	/ As in Harbison and Steele. /
264	# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
265	# endif
266
267	# ifndef emacs
268	/ How many characters in the character set. /
269	# define CHAR_SET_SIZE 256
270
271	# ifdef SYNTAX_TABLE
272
273	extern char *re_syntax_table;
274
275	# else /* not SYNTAX_TABLE */
276
277	static char re_syntax_table[CHAR_SET_SIZE];
278
279	static void init_syntax_once (void);
280
281	static void
282	init_syntax_once (void)
283	{
284	register int c;
285	static int done = `0`;
286
287	if (done)
288	return;
289	bzero (re_syntax_table, sizeof re_syntax_table);
290
291	for (c = `0`; c < CHAR_SET_SIZE; ++c)
292	if (ISALNUM (c))
293	re_syntax_table[c] = Sword;
294
295	re_syntax_table[`'_'`] = Sword;
296
297	done = `1`;
298	}
299
300	# endif /* not SYNTAX_TABLE */
301
302	# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
303
304	# endif /* emacs */
305
306	/ Integer type for pointers. /
307	# if !defined _LIBC && !defined HAVE_UINTPTR_T
308	typedef unsigned long int uintptr_t;
309	# endif
310
311	/ Should we use malloc or alloca? If REGEX_MALLOC is not defined, we*
312	use `alloca' instead of `malloc'. This is because using malloc in
313	re_search or re_match* could cause memory leaks when C-g is used in*
314	Emacs; also, malloc is slower and causes storage fragmentation. On
315	the other hand, malloc is more portable, and easier to debug.
316
317	Because we sometimes use alloca, some routines have to be macros,
318	not functions -- `alloca'-allocated space disappears at the end of the
319	function it is called in. /*
320
321	# ifdef REGEX_MALLOC
322
323	# define REGEX_ALLOCATE malloc
324	# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
325	# define REGEX_FREE free
326
327	# else /* not REGEX_MALLOC */
328
329	/ Emacs already defines alloca, sometimes. /
330	# ifndef alloca
331
332	/ Make alloca work the best possible way. /
333	# ifdef __GNUC__
334	# define alloca __builtin_alloca
335	# else /* not __GNUC__ */
336	# if HAVE_ALLOCA_H
337	# include <alloca.h>
338	# endif /* HAVE_ALLOCA_H */
339	# endif /* not __GNUC__ */
340
341	# endif /* not alloca */
342
343	# define REGEX_ALLOCATE alloca
344
345	/ Assumes a `char destination' variable. /*
346	# define REGEX_REALLOCATE(source, osize, nsize) \
347	(destination = (char *) alloca (nsize), \
348	memcpy (destination, source, osize))
349
350	/ No need to do anything to free, after alloca. /
351	# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
352
353	# endif /* not REGEX_MALLOC */
354
355	/ Define how to allocate the failure stack. /
356
357	# if defined REL_ALLOC && defined REGEX_MALLOC
358
359	# define REGEX_ALLOCATE_STACK(size) \
360	r_alloc (&failure_stack_ptr, (size))
361	# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
362	r_re_alloc (&failure_stack_ptr, (nsize))
363	# define REGEX_FREE_STACK(ptr) \
364	r_alloc_free (&failure_stack_ptr)
365
366	# else /* not using relocating allocator */
367
368	# ifdef REGEX_MALLOC
369
370	# define REGEX_ALLOCATE_STACK malloc
371	# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
372	# define REGEX_FREE_STACK free
373
374	# else /* not REGEX_MALLOC */
375
376	# define REGEX_ALLOCATE_STACK alloca
377
378	# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
379	REGEX_REALLOCATE (source, osize, nsize)
380	/ No need to explicitly free anything. /
381	# define REGEX_FREE_STACK(arg)
382
383	# endif /* not REGEX_MALLOC */
384	# endif /* not using relocating allocator */
385
386
387	/ True if `size1' is non-NULL and PTR is pointing anywhere inside*
388	`string1' or just past its end. This works if PTR is NULL, which is
389	a good thing. /*
390	# define FIRST_STRING_P(ptr) \
391	(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
392
393	/ (Re)Allocate N items of type T using malloc, or fail. /
394	# define TALLOC(n, t) ((t ) malloc ((n) sizeof (t)))
395	# define RETALLOC(addr, n, t) ((addr) = (t ) realloc (addr, (n) sizeof (t)))
396	# define RETALLOC_IF(addr, n, t) \
397	if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
398	# define REGEX_TALLOC(n, t) ((t ) REGEX_ALLOCATE ((n) sizeof (t)))
399
400	# define BYTEWIDTH 8 /* In bits. */
401
402	# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
403
404	# undef MAX
405	# undef MIN
406	# define MAX(a, b) ((a) > (b) ? (a) : (b))
407	# define MIN(a, b) ((a) < (b) ? (a) : (b))
408
409	typedef char boolean;
410	# define false 0
411	# define true 1
412
413	static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
414	reg_syntax_t syntax,
415	struct re_pattern_buffer *bufp);
416
417	static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
418	const char string1, int* size1,
419	const char string2, int* size2,
420	int pos,
421	struct re_registers *regs,
422	int stop);
423	static int byte_re_search_2 (struct re_pattern_buffer *bufp,
424	const char string1, int* size1,
425	const char string2, int* size2,
426	int startpos, int range,
427	struct re_registers regs, int* stop);
428	static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
429
430	#ifdef MBS_SUPPORT
431	static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
432	reg_syntax_t syntax,
433	struct re_pattern_buffer *bufp);
434
435
436	static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
437	const char cstring1, int* csize1,
438	const char cstring2, int* csize2,
439	int pos,
440	struct re_registers *regs,
441	int stop,
442	wchar_t string1, int* size1,
443	wchar_t string2, int* size2,
444	int mbs_offset1, int* *mbs_offset2);
445	static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
446	const char string1, int* size1,
447	const char string2, int* size2,
448	int startpos, int range,
449	struct re_registers regs, int* stop);
450	static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
451	#endif
452
453	/ These are the command codes that appear in compiled regular*
454	expressions. Some opcodes are followed by argument bytes. A
455	command code can specify any interpretation whatsoever for its
456	arguments. Zero bytes may appear in the compiled regular expression. /*
457
458	typedef enum
459	{
460	no_op = `0`,
461
462	/ Succeed right away--no more backtracking. /
463	succeed,
464
465	/ Followed by one byte giving n, then by n literal bytes. /
466	exactn,
467
468	# ifdef MBS_SUPPORT
469	/ Same as exactn, but contains binary data. /
470	exactn_bin,
471	# endif
472
473	/ Matches any (more or less) character. /
474	anychar,
475
476	/ Matches any one char belonging to specified set. First*
477	following byte is number of bitmap bytes. Then come bytes
478	for a bitmap saying which chars are in. Bits in each byte
479	are ordered low-bit-first. A character is in the set if its
480	bit is 1. A character too large to have a bit in the map is
481	automatically not in the set. /*
482	/ ifdef MBS_SUPPORT, following element is length of character*
483	classes, length of collating symbols, length of equivalence
484	classes, length of character ranges, and length of characters.
485	Next, character class element, collating symbols elements,
486	equivalence class elements, range elements, and character
487	elements follow.
488	See regex_compile function. /*
489	charset,
490
491	/ Same parameters as charset, but match any character that is*
492	not one of those specified. /*
493	charset_not,
494
495	/ Start remembering the text that is matched, for storing in a*
496	register. Followed by one byte with the register number, in
497	the range 0 to one less than the pattern buffer's re_nsub
498	field. Then followed by one byte with the number of groups
499	inner to this one. (This last has to be part of the
500	start_memory only because we need it in the on_failure_jump
501	of re_match_2.) /*
502	start_memory,
503
504	/ Stop remembering the text that is matched and store it in a*
505	memory register. Followed by one byte with the register
506	number, in the range 0 to one less than `re_nsub' in the
507	pattern buffer, and one byte with the number of inner groups,
508	just like `start_memory'. (We need the number of inner
509	groups here because we don't have any easy way of finding the
510	corresponding start_memory when we're at a stop_memory.) /*
511	stop_memory,
512
513	/ Match a duplicate of something remembered. Followed by one*
514	byte containing the register number. /*
515	duplicate,
516
517	/ Fail unless at beginning of line. /
518	begline,
519
520	/ Fail unless at end of line. /
521	endline,
522
523	/ Succeeds if at beginning of buffer (if emacs) or at beginning*
524	of string to be matched (if not). /*
525	begbuf,
526
527	/ Analogously, for end of buffer/string. /
528	endbuf,
529
530	/ Followed by two byte relative address to which to jump. /
531	jump,
532
533	/ Same as jump, but marks the end of an alternative. /
534	jump_past_alt,
535
536	/ Followed by two-byte relative address of place to resume at*
537	in case of failure. /*
538	/ ifdef MBS_SUPPORT, the size of address is 1. /
539	on_failure_jump,
540
541	/ Like on_failure_jump, but pushes a placeholder instead of the*
542	current string position when executed. /*
543	on_failure_keep_string_jump,
544
545	/ Throw away latest failure point and then jump to following*
546	two-byte relative address. /*
547	/ ifdef MBS_SUPPORT, the size of address is 1. /
548	pop_failure_jump,
549
550	/ Change to pop_failure_jump if know won't have to backtrack to*
551	match; otherwise change to jump. This is used to jump
552	back to the beginning of a repeat. If what follows this jump
553	clearly won't match what the repeat does, such that we can be
554	sure that there is no use backtracking out of repetitions
555	already matched, then we change it to a pop_failure_jump.
556	Followed by two-byte address. /*
557	/ ifdef MBS_SUPPORT, the size of address is 1. /
558	maybe_pop_jump,
559
560	/ Jump to following two-byte address, and push a dummy failure*
561	point. This failure point will be thrown away if an attempt
562	is made to use it for a failure. A `+' construct makes this
563	before the first repeat. Also used as an intermediary kind
564	of jump when compiling an alternative. /*
565	/ ifdef MBS_SUPPORT, the size of address is 1. /
566	dummy_failure_jump,
567
568	/ Push a dummy failure point and continue. Used at the end of*
569	alternatives. /*
570	push_dummy_failure,
571
572	/ Followed by two-byte relative address and two-byte number n.*
573	After matching N times, jump to the address upon failure. /*
574	/ ifdef MBS_SUPPORT, the size of address is 1. /
575	succeed_n,
576
577	/ Followed by two-byte relative address, and two-byte number n.*
578	Jump to the address N times, then fail. /*
579	/ ifdef MBS_SUPPORT, the size of address is 1. /
580	jump_n,
581
582	/ Set the following two-byte relative address to the*
583	subsequent two-byte number. The address includes* the two*
584	bytes of number. /*
585	/ ifdef MBS_SUPPORT, the size of address is 1. /
586	set_number_at,
587
588	wordchar, / Matches any word-constituent character. /
589	notwordchar, / Matches any char that is not a word-constituent. /
590
591	wordbeg, / Succeeds if at word beginning. /
592	wordend, / Succeeds if at word end. /
593
594	wordbound, / Succeeds if at a word boundary. /
595	notwordbound / Succeeds if not at a word boundary. /
596
597	# ifdef emacs
598	,before_dot, / Succeeds if before point. /
599	at_dot, / Succeeds if at point. /
600	after_dot, / Succeeds if after point. /
601
602	/ Matches any character whose syntax is specified. Followed by*
603	a byte which contains a syntax code, e.g., Sword. /*
604	syntaxspec,
605
606	/ Matches any character whose syntax is not that specified. /
607	notsyntaxspec
608	# endif /* emacs */
609	} re_opcode_t;
610	#endif /* not INSIDE_RECURSION */
611
612
613	#ifdef BYTE
614	# define CHAR_T char
615	# define UCHAR_T unsigned char
616	# define COMPILED_BUFFER_VAR bufp->buffer
617	# define OFFSET_ADDRESS_SIZE 2
618	# define PREFIX(name) byte_##name
619	# define ARG_PREFIX(name) name
620	# define PUT_CHAR(c) putchar (c)
621	#else
622	# ifdef WCHAR
623	# define CHAR_T wchar_t
624	# define UCHAR_T wchar_t
625	# define COMPILED_BUFFER_VAR wc_buffer
626	# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
627	# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
628	# define PREFIX(name) wcs_##name
629	# define ARG_PREFIX(name) c##name
630	/ Should we use wide stream?? /
631	# define PUT_CHAR(c) printf ("%C", c);
632	# define TRUE 1
633	# define FALSE 0
634	# else
635	# ifdef MBS_SUPPORT
636	# define WCHAR
637	# define INSIDE_RECURSION
638	# include "regex.c"
639	# undef INSIDE_RECURSION
640	# endif
641	# define BYTE
642	# define INSIDE_RECURSION
643	# include "regex.c"
644	# undef INSIDE_RECURSION
645	# endif
646	#endif
647
648	#ifdef INSIDE_RECURSION
649	/ Common operations on the compiled pattern. /
650
651	/ Store NUMBER in two contiguous bytes starting at DESTINATION. /
652	/ ifdef MBS_SUPPORT, we store NUMBER in 1 element. /
653
654	# ifdef WCHAR
655	# define STORE_NUMBER(destination, number) \
656	do { \
657	*(destination) = (UCHAR_T)(number); \
658	} while (0)
659	# else /* BYTE */
660	# define STORE_NUMBER(destination, number) \
661	do { \
662	(destination)[0] = (number) & 0377; \
663	(destination)[1] = (number) >> 8; \
664	} while (0)
665	# endif /* WCHAR */
666
667	/ Same as STORE_NUMBER, except increment DESTINATION to*
668	the byte after where the number is stored. Therefore, DESTINATION
669	must be an lvalue. /*
670	/ ifdef MBS_SUPPORT, we store NUMBER in 1 element. /
671
672	# define STORE_NUMBER_AND_INCR(destination, number) \
673	do { \
674	STORE_NUMBER (destination, number); \
675	(destination) += OFFSET_ADDRESS_SIZE; \
676	} while (0)
677
678	/ Put into DESTINATION a number stored in two contiguous bytes starting*
679	at SOURCE. /*
680	/ ifdef MBS_SUPPORT, we store NUMBER in 1 element. /
681
682	# ifdef WCHAR
683	# define EXTRACT_NUMBER(destination, source) \
684	do { \
685	(destination) = *(source); \
686	} while (0)
687	# else /* BYTE */
688	# define EXTRACT_NUMBER(destination, source) \
689	do { \
690	(destination) = *(source) & 0377; \
691	(destination) += ((unsigned) SIGN_EXTEND_CHAR (*((source) + 1))) << 8; \
692	} while (0)
693	# endif
694
695	# ifdef DEBUG
696	static void PREFIX(extract_number) (int dest, UCHAR_T source);
697	static void
698	PREFIX(extract_number) (int dest, UCHAR_T source)
699	{
700	# ifdef WCHAR
701	dest = source;
702	# else /* BYTE */
703	int temp = SIGN_EXTEND_CHAR (*(source + `1`));
704	dest = source & `0377`;
705	*dest += temp << `8`;
706	# endif
707	}
708
709	# ifndef EXTRACT_MACROS /* To debug the macros. */
710	# undef EXTRACT_NUMBER
711	# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
712	# endif /* not EXTRACT_MACROS */
713
714	# endif /* DEBUG */
715
716	/ Same as EXTRACT_NUMBER, except increment SOURCE to after the number.*
717	SOURCE must be an lvalue. /*
718
719	# define EXTRACT_NUMBER_AND_INCR(destination, source) \
720	do { \
721	EXTRACT_NUMBER (destination, source); \
722	(source) += OFFSET_ADDRESS_SIZE; \
723	} while (0)
724
725	# ifdef DEBUG
726	static void PREFIX(extract_number_and_incr) (int *destination,
727	UCHAR_T **source);
728	static void
729	PREFIX(extract_number_and_incr) (int destination, UCHAR_T *source)
730	{
731	PREFIX(extract_number) (destination, *source);
732	*source += OFFSET_ADDRESS_SIZE;
733	}
734
735	# ifndef EXTRACT_MACROS
736	# undef EXTRACT_NUMBER_AND_INCR
737	# define EXTRACT_NUMBER_AND_INCR(dest, src) \
738	PREFIX(extract_number_and_incr) (&dest, &src)
739	# endif /* not EXTRACT_MACROS */
740
741	# endif /* DEBUG */
742
743
744
745	/ If DEBUG is defined, Regex prints many voluminous messages about what*
746	it is doing (if the variable `debug' is nonzero). If linked with the
747	main program in `iregex.c', you can enter patterns and strings
748	interactively. And if linked with the main program in `main.c' and
749	the other test files, you can run the already-written tests. /*
750
751	# ifdef DEBUG
752
753	# ifndef DEFINED_ONCE
754
755	/ We use standard I/O for debugging. /
756	# include <stdio.h>
757
758	/ It is useful to test things that ``must'' be true when debugging. /
759	# include <assert.h>
760
761	static int debug;
762
763	# define DEBUG_STATEMENT(e) e
764	# define DEBUG_PRINT1(x) if (debug) printf (x)
765	# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
766	# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
767	# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
768	# endif /* not DEFINED_ONCE */
769
770	# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
771	if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
772	# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
773	if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
774
775
776	/ Print the fastmap in human-readable form. /
777
778	# ifndef DEFINED_ONCE
779	void
780	print_fastmap (char *fastmap)
781	{
782	unsigned was_a_range = `0`;
783	unsigned i = `0`;
784
785	while (i < (`1` << BYTEWIDTH))
786	{
787	if (fastmap[i++])
788	{
789	was_a_range = `0`;
790	putchar (i - `1`);
791	while (i < (`1` << BYTEWIDTH) && fastmap[i])
792	{
793	was_a_range = `1`;
794	i++;
795	}
796	if (was_a_range)
797	{
798	printf ("-");
799	putchar (i - `1`);
800	}
801	}
802	}
803	putchar (`'\n'`);
804	}
805	# endif /* not DEFINED_ONCE */
806
807
808	/ Print a compiled pattern string in human-readable form, starting at*
809	the START pointer into it and ending just before the pointer END. /*
810
811	void
812	PREFIX(print_partial_compiled_pattern) (UCHAR_T start, UCHAR_T end)
813	{
814	int mcnt, mcnt2;
815	UCHAR_T *p1;
816	UCHAR_T *p = start;
817	UCHAR_T *pend = end;
818
819	if (start == NULL)
820	{
821	printf ("(null)\n");
822	return;
823	}
824
825	/ Loop over pattern commands. /
826	while (p < pend)
827	{
828	# ifdef _LIBC
829	printf ("%td:\t", p - start);
830	# else
831	printf ("%ld:\t", (long int) (p - start));
832	# endif
833
834	switch ((re_opcode_t) *p++)
835	{
836	case no_op:
837	printf ("/no_op");
838	break;
839
840	case exactn:
841	mcnt = *p++;
842	printf ("/exactn/%d", mcnt);
843	do
844	{
845	putchar (`'/'`);
846	PUT_CHAR (*p++);
847	}
848	while (--mcnt);
849	break;
850
851	# ifdef MBS_SUPPORT
852	case exactn_bin:
853	mcnt = *p++;
854	printf ("/exactn_bin/%d", mcnt);
855	do
856	{
857	printf("/%lx", (long int) *p++);
858	}
859	while (--mcnt);
860	break;
861	# endif /* MBS_SUPPORT */
862
863	case start_memory:
864	mcnt = *p++;
865	printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
866	break;
867
868	case stop_memory:
869	mcnt = *p++;
870	printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
871	break;
872
873	case duplicate:
874	printf ("/duplicate/%ld", (long int) *p++);
875	break;
876
877	case anychar:
878	printf ("/anychar");
879	break;
880
881	case charset:
882	case charset_not:
883	{
884	# ifdef WCHAR
885	int i, length;
886	wchar_t *workp = p;
887	printf ("/charset [%s",
888	(re_opcode_t) *(workp - `1`) == charset_not ? "^" : "");
889	p += `5`;
890	length = workp++; /* the length of char_classes /
891	for (i=`0` ; i<length ; i++)
892	printf("[:%lx:]", (long int) *p++);
893	length = workp++; /* the length of collating_symbol /
894	for (i=`0` ; i<length ;)
895	{
896	printf("[.");
897	while(*p != `0`)
898	PUT_CHAR((i++,*p++));
899	i++,p++;
900	printf(".]");
901	}
902	length = workp++; /* the length of equivalence_class /
903	for (i=`0` ; i<length ;)
904	{
905	printf("[=");
906	while(*p != `0`)
907	PUT_CHAR((i++,*p++));
908	i++,p++;
909	printf("=]");
910	}
911	length = workp++; /* the length of char_range /
912	for (i=`0` ; i<length ; i++)
913	{
914	wchar_t range_start = *p++;
915	wchar_t range_end = *p++;
916	printf("%C-%C", range_start, range_end);
917	}
918	length = workp++; /* the length of char /
919	for (i=`0` ; i<length ; i++)
920	printf("%C", *p++);
921	putchar (`']'`);
922	# else
923	register int c, last = -`100`;
924	register int in_range = `0`;
925
926	printf ("/charset [%s",
927	(re_opcode_t) *(p - `1`) == charset_not ? "^" : "");
928
929	assert (p + *p < pend);
930
931	for (c = `0`; c < `256`; c++)
932	if (c / `8` < *p
933	&& (p[`1` + (c/`8`)] & (`1` << (c % `8`))))
934	{
935	/ Are we starting a range? /
936	if (last + `1` == c && ! in_range)
937	{
938	putchar (`'-'`);
939	in_range = `1`;
940	}
941	/ Have we broken a range? /
942	else if (last + `1` != c && in_range)
943	{
944	putchar (last);
945	in_range = `0`;
946	}
947
948	if (! in_range)
949	putchar (c);
950
951	last = c;
952	}
953
954	if (in_range)
955	putchar (last);
956
957	putchar (`']'`);
958
959	p += `1` + *p;
960	# endif /* WCHAR */
961	}
962	break;
963
964	case begline:
965	printf ("/begline");
966	break;
967
968	case endline:
969	printf ("/endline");
970	break;
971
972	case on_failure_jump:
973	PREFIX(extract_number_and_incr) (&mcnt, &p);
974	# ifdef _LIBC
975	printf ("/on_failure_jump to %td", p + mcnt - start);
976	# else
977	printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
978	# endif
979	break;
980
981	case on_failure_keep_string_jump:
982	PREFIX(extract_number_and_incr) (&mcnt, &p);
983	# ifdef _LIBC
984	printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
985	# else
986	printf ("/on_failure_keep_string_jump to %ld",
987	(long int) (p + mcnt - start));
988	# endif
989	break;
990
991	case dummy_failure_jump:
992	PREFIX(extract_number_and_incr) (&mcnt, &p);
993	# ifdef _LIBC
994	printf ("/dummy_failure_jump to %td", p + mcnt - start);
995	# else
996	printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
997	# endif
998	break;
999
1000	case push_dummy_failure:
1001	printf ("/push_dummy_failure");
1002	break;
1003
1004	case maybe_pop_jump:
1005	PREFIX(extract_number_and_incr) (&mcnt, &p);
1006	# ifdef _LIBC
1007	printf ("/maybe_pop_jump to %td", p + mcnt - start);
1008	# else
1009	printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1010	# endif
1011	break;
1012
1013	case pop_failure_jump:
1014	PREFIX(extract_number_and_incr) (&mcnt, &p);
1015	# ifdef _LIBC
1016	printf ("/pop_failure_jump to %td", p + mcnt - start);
1017	# else
1018	printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1019	# endif
1020	break;
1021
1022	case jump_past_alt:
1023	PREFIX(extract_number_and_incr) (&mcnt, &p);
1024	# ifdef _LIBC
1025	printf ("/jump_past_alt to %td", p + mcnt - start);
1026	# else
1027	printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1028	# endif
1029	break;
1030
1031	case jump:
1032	PREFIX(extract_number_and_incr) (&mcnt, &p);
1033	# ifdef _LIBC
1034	printf ("/jump to %td", p + mcnt - start);
1035	# else
1036	printf ("/jump to %ld", (long int) (p + mcnt - start));
1037	# endif
1038	break;
1039
1040	case succeed_n:
1041	PREFIX(extract_number_and_incr) (&mcnt, &p);
1042	p1 = p + mcnt;
1043	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1044	# ifdef _LIBC
1045	printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1046	# else
1047	printf ("/succeed_n to %ld, %d times",
1048	(long int) (p1 - start), mcnt2);
1049	# endif
1050	break;
1051
1052	case jump_n:
1053	PREFIX(extract_number_and_incr) (&mcnt, &p);
1054	p1 = p + mcnt;
1055	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1056	printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1057	break;
1058
1059	case set_number_at:
1060	PREFIX(extract_number_and_incr) (&mcnt, &p);
1061	p1 = p + mcnt;
1062	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063	# ifdef _LIBC
1064	printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1065	# else
1066	printf ("/set_number_at location %ld to %d",
1067	(long int) (p1 - start), mcnt2);
1068	# endif
1069	break;
1070
1071	case wordbound:
1072	printf ("/wordbound");
1073	break;
1074
1075	case notwordbound:
1076	printf ("/notwordbound");
1077	break;
1078
1079	case wordbeg:
1080	printf ("/wordbeg");
1081	break;
1082
1083	case wordend:
1084	printf ("/wordend");
1085	break;
1086
1087	# ifdef emacs
1088	case before_dot:
1089	printf ("/before_dot");
1090	break;
1091
1092	case at_dot:
1093	printf ("/at_dot");
1094	break;
1095
1096	case after_dot:
1097	printf ("/after_dot");
1098	break;
1099
1100	case syntaxspec:
1101	printf ("/syntaxspec");
1102	mcnt = *p++;
1103	printf ("/%d", mcnt);
1104	break;
1105
1106	case notsyntaxspec:
1107	printf ("/notsyntaxspec");
1108	mcnt = *p++;
1109	printf ("/%d", mcnt);
1110	break;
1111	# endif /* emacs */
1112
1113	case wordchar:
1114	printf ("/wordchar");
1115	break;
1116
1117	case notwordchar:
1118	printf ("/notwordchar");
1119	break;
1120
1121	case begbuf:
1122	printf ("/begbuf");
1123	break;
1124
1125	case endbuf:
1126	printf ("/endbuf");
1127	break;
1128
1129	default:
1130	printf ("?%ld", (long int) *(p-`1`));
1131	}
1132
1133	putchar (`'\n'`);
1134	}
1135
1136	# ifdef _LIBC
1137	printf ("%td:\tend of pattern.\n", p - start);
1138	# else
1139	printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1140	# endif
1141	}
1142
1143
1144	void
1145	PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
1146	{
1147	UCHAR_T buffer = (UCHAR_T) bufp->buffer;
1148
1149	PREFIX(print_partial_compiled_pattern) (buffer, buffer
1150	+ bufp->used / sizeof(UCHAR_T));
1151	printf ("%ld bytes used/%ld bytes allocated.\n",
1152	bufp->used, bufp->allocated);
1153
1154	if (bufp->fastmap_accurate && bufp->fastmap)
1155	{
1156	printf ("fastmap: ");
1157	print_fastmap (bufp->fastmap);
1158	}
1159
1160	# ifdef _LIBC
1161	printf ("re_nsub: %Zd\t", bufp->re_nsub);
1162	# else
1163	printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1164	# endif
1165	printf ("regs_alloc: %d\t", bufp->regs_allocated);
1166	printf ("can_be_null: %d\t", bufp->can_be_null);
1167	printf ("newline_anchor: %d\n", bufp->newline_anchor);
1168	printf ("no_sub: %d\t", bufp->no_sub);
1169	printf ("not_bol: %d\t", bufp->not_bol);
1170	printf ("not_eol: %d\t", bufp->not_eol);
1171	printf ("syntax: %lx\n", bufp->syntax);
1172	/ Perhaps we should print the translate table? /
1173	}
1174
1175
1176	void
1177	PREFIX(print_double_string) (const CHAR_T where, const* CHAR_T *string1,
1178	int size1, const CHAR_T string2, int* size2)
1179	{
1180	int this_char;
1181
1182	if (where == NULL)
1183	printf ("(null)");
1184	else
1185	{
1186	int cnt;
1187
1188	if (FIRST_STRING_P (where))
1189	{
1190	for (this_char = where - string1; this_char < size1; this_char++)
1191	PUT_CHAR (string1[this_char]);
1192
1193	where = string2;
1194	}
1195
1196	cnt = `0`;
1197	for (this_char = where - string2; this_char < size2; this_char++)
1198	{
1199	PUT_CHAR (string2[this_char]);
1200	if (++cnt > `100`)
1201	{
1202	fputs ("...", stdout);
1203	break;
1204	}
1205	}
1206	}
1207	}
1208
1209	# ifndef DEFINED_ONCE
1210	void
1211	printchar (int c)
1212	{
1213	putc (c, stderr);
1214	}
1215	# endif
1216
1217	# else /* not DEBUG */
1218
1219	# ifndef DEFINED_ONCE
1220	# undef assert
1221	# define assert(e)
1222
1223	# define DEBUG_STATEMENT(e)
1224	# define DEBUG_PRINT1(x)
1225	# define DEBUG_PRINT2(x1, x2)
1226	# define DEBUG_PRINT3(x1, x2, x3)
1227	# define DEBUG_PRINT4(x1, x2, x3, x4)
1228	# endif /* not DEFINED_ONCE */
1229	# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1230	# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1231
1232	# endif /* not DEBUG */
1233
1234
1235
1236	# ifdef WCHAR
1237	/ This convert a multibyte string to a wide character string.*
1238	And write their correspondances to offset_buffer(see below)
1239	and write whether each wchar_t is binary data to is_binary.
1240	This assume invalid multibyte sequences as binary data.
1241	We assume offset_buffer and is_binary is already allocated
1242	enough space. /*
1243
1244	static size_t convert_mbs_to_wcs (CHAR_T dest, const* unsigned char* src,
1245	size_t len, int *offset_buffer,
1246	char *is_binary);
1247	static size_t
1248	convert_mbs_to_wcs (CHAR_T dest, const* unsigned char*src, size_t len,
1249	int offset_buffer, char* *is_binary)
1250	/ It hold correspondances between src(char string) and*
1251	dest(wchar_t string) for optimization.
1252	e.g. src = "xxxyzz"
1253	dest = {'X', 'Y', 'Z'}
1254	(each "xxx", "y" and "zz" represent one multibyte character
1255	corresponding to 'X', 'Y' and 'Z'.)
1256	offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1257	= {0, 3, 4, 6}
1258	*/
1259	{
1260	wchar_t *pdest = dest;
1261	const unsigned char *psrc = src;
1262	size_t wc_count = `0`;
1263
1264	mbstate_t mbs;
1265	int i, consumed;
1266	size_t mb_remain = len;
1267	size_t mb_count = `0`;
1268
1269	/ Initialize the conversion state. /
1270	memset (&mbs, `0`, sizeof (mbstate_t));
1271
1272	offset_buffer[`0`] = `0`;
1273	for( ; mb_remain > `0` ; ++wc_count, ++pdest, mb_remain -= consumed,
1274	psrc += consumed)
1275	{
1276	#ifdef _LIBC
1277	consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
1278	#else
1279	consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1280	#endif
1281
1282	if (consumed <= `0`)
1283	/ failed to convert. maybe src contains binary data.*
1284	So we consume 1 byte manualy. /*
1285	{
1286	pdest = psrc;
1287	consumed = `1`;
1288	is_binary[wc_count] = TRUE;
1289	}
1290	else
1291	is_binary[wc_count] = FALSE;
1292	/ In sjis encoding, we use yen sign as escape character in*
1293	place of reverse solidus. So we convert 0x5c(yen sign in
1294	sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1295	solidus in UCS2). /*
1296	if (consumed == `1` && (int) psrc == `0x5c` && (int) pdest == `0xa5`)
1297	pdest = (wchar_t) psrc;
1298
1299	offset_buffer[wc_count + `1`] = mb_count += consumed;
1300	}
1301
1302	/ Fill remain of the buffer with sentinel. /
1303	for (i = wc_count + `1` ; i <= len ; i++)
1304	offset_buffer[i] = mb_count + `1`;
1305
1306	return wc_count;
1307	}
1308
1309	# endif /* WCHAR */
1310
1311	#else /* not INSIDE_RECURSION */
1312
1313	/ Set by `re_set_syntax' to the current regexp syntax to recognize. Can*
1314	also be assigned to arbitrarily: each pattern buffer stores its own
1315	syntax, so it can be changed between regex compilations. /*
1316	/ This has no initializer because initialized variables in Emacs*
1317	become read-only after dumping. /*
1318	reg_syntax_t re_syntax_options;
1319
1320
1321	/ Specify the precise syntax of regexps for compilation. This provides*
1322	for compatibility for various utilities which historically have
1323	different, incompatible syntaxes.
1324
1325	The argument SYNTAX is a bit mask comprised of the various bits
1326	defined in regex.h. We return the old syntax. /*
1327
1328	reg_syntax_t
1329	re_set_syntax (reg_syntax_t syntax)
1330	{
1331	reg_syntax_t ret = re_syntax_options;
1332
1333	re_syntax_options = syntax;
1334	# ifdef DEBUG
1335	if (syntax & RE_DEBUG)
1336	debug = `1`;
1337	else if (debug) / was on but now is not /
1338	debug = `0`;
1339	# endif /* DEBUG */
1340	return ret;
1341	}
1342	# ifdef _LIBC
1343	weak_alias (__re_set_syntax, re_set_syntax)
1344	# endif
1345
1346	/ This table gives an error message for each of the error codes listed*
1347	in regex.h. Obviously the order here has to be same as there.
1348	POSIX doesn't require that we do anything for REG_NOERROR,
1349	but why not be nice? /*
1350
1351	static const char *re_error_msgid[] =
1352	{
1353	gettext_noop ("Success"), / REG_NOERROR /
1354	gettext_noop ("No match"), / REG_NOMATCH /
1355	gettext_noop ("Invalid regular expression"), / REG_BADPAT /
1356	gettext_noop ("Invalid collation character"), / REG_ECOLLATE /
1357	gettext_noop ("Invalid character class name"), / REG_ECTYPE /
1358	gettext_noop ("Trailing backslash"), / REG_EESCAPE /
1359	gettext_noop ("Invalid back reference"), / REG_ESUBREG /
1360	gettext_noop ("Unmatched [ or [^"), / REG_EBRACK /
1361	gettext_noop ("Unmatched ( or \\("), / REG_EPAREN /
1362	gettext_noop ("Unmatched \\{"), / REG_EBRACE /
1363	gettext_noop ("Invalid content of \\{\\}"), / REG_BADBR /
1364	gettext_noop ("Invalid range end"), / REG_ERANGE /
1365	gettext_noop ("Memory exhausted"), / REG_ESPACE /
1366	gettext_noop ("Invalid preceding regular expression"), / REG_BADRPT /
1367	gettext_noop ("Premature end of regular expression"), / REG_EEND /
1368	gettext_noop ("Regular expression too big"), / REG_ESIZE /
1369	gettext_noop ("Unmatched ) or \\)") / REG_ERPAREN /
1370	};
1371
1372	#endif /* INSIDE_RECURSION */
1373
1374	#ifndef DEFINED_ONCE
1375	/ Avoiding alloca during matching, to placate r_alloc. /
1376
1377	/ Define MATCH_MAY_ALLOCATE unless we need to make sure that the*
1378	searching and matching functions should not call alloca. On some
1379	systems, alloca is implemented in terms of malloc, and if we're
1380	using the relocating allocator routines, then malloc could cause a
1381	relocation, which might (if the strings being searched are in the
1382	ralloc heap) shift the data out from underneath the regexp
1383	routines.
1384
1385	Here's another reason to avoid allocation: Emacs
1386	processes input from X in a signal handler; processing X input may
1387	call malloc; if input arrives while a matching routine is calling
1388	malloc, then we're scrod. But Emacs can't just block input while
1389	calling matching routines; then we don't notice interrupts when
1390	they come in. So, Emacs blocks input around all regexp calls
1391	except the matching calls, which it leaves unprotected, in the
1392	faith that they will not malloc. /*
1393
1394	/ Normally, this is fine. /
1395	# define MATCH_MAY_ALLOCATE
1396
1397	/ When using GNU C, we are not REALLY using the C alloca, no matter*
1398	what config.h may say. So don't take precautions for it. /*
1399	# ifdef __GNUC__
1400	# undef C_ALLOCA
1401	# endif
1402
1403	/ The match routines may not allocate if (1) they would do it with malloc*
1404	and (2) it's not safe for them to use malloc.
1405	Note that if REL_ALLOC is defined, matching would not use malloc for the
1406	failure stack, but we would still use it for the register vectors;
1407	so REL_ALLOC should not affect this. /*
1408	# if (defined C_ALLOCA \|\| defined REGEX_MALLOC) && defined emacs
1409	# undef MATCH_MAY_ALLOCATE
1410	# endif
1411	#endif /* not DEFINED_ONCE */
1412
1413	#ifdef INSIDE_RECURSION
1414	/ Failure stack declarations and macros; both re_compile_fastmap and*
1415	re_match_2 use a failure stack. These have to be macros because of
1416	REGEX_ALLOCATE_STACK. /*
1417
1418
1419	/ Number of failure points for which to initially allocate space*
1420	when matching. If this number is exceeded, we allocate more
1421	space, so it is not a hard limit. /*
1422	# ifndef INIT_FAILURE_ALLOC
1423	# define INIT_FAILURE_ALLOC 5
1424	# endif
1425
1426	/ Roughly the maximum number of failure points on the stack. Would be*
1427	exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1428	This is a variable only so users of regex can assign to it; we never
1429	change it ourselves. /*
1430
1431	# ifdef INT_IS_16BIT
1432
1433	# ifndef DEFINED_ONCE
1434	# if defined MATCH_MAY_ALLOCATE
1435	/ 4400 was enough to cause a crash on Alpha OSF/1,*
1436	whose default stack limit is 2mb. /*
1437	long int re_max_failures = `4000`;
1438	# else
1439	long int re_max_failures = `2000`;
1440	# endif
1441	# endif
1442
1443	union PREFIX(fail_stack_elt)
1444	{
1445	UCHAR_T *pointer;
1446	long int integer;
1447	};
1448
1449	typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1450
1451	typedef struct
1452	{
1453	PREFIX(fail_stack_elt_t) *stack;
1454	unsigned long int size;
1455	unsigned long int avail; / Offset of next open position. /
1456	} PREFIX(fail_stack_type);
1457
1458	# else /* not INT_IS_16BIT */
1459
1460	# ifndef DEFINED_ONCE
1461	# if defined MATCH_MAY_ALLOCATE
1462	/ 4400 was enough to cause a crash on Alpha OSF/1,*
1463	whose default stack limit is 2mb. /*
1464	int re_max_failures = `4000`;
1465	# else
1466	int re_max_failures = `2000`;
1467	# endif
1468	# endif
1469
1470	union PREFIX(fail_stack_elt)
1471	{
1472	UCHAR_T *pointer;
1473	int integer;
1474	};
1475
1476	typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1477
1478	typedef struct
1479	{
1480	PREFIX(fail_stack_elt_t) *stack;
1481	unsigned size;
1482	unsigned avail; / Offset of next open position. /
1483	} PREFIX(fail_stack_type);
1484
1485	# endif /* INT_IS_16BIT */
1486
1487	# ifndef DEFINED_ONCE
1488	# define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1489	# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1490	# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1491	# endif
1492
1493
1494	/ Define macros to initialize and free the failure stack.*
1495	Do `return -2' if the alloc fails. /*
1496
1497	# ifdef MATCH_MAY_ALLOCATE
1498	# define INIT_FAIL_STACK() \
1499	do { \
1500	fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
1501	REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1502	\
1503	if (fail_stack.stack == NULL) \
1504	return -2; \
1505	\
1506	fail_stack.size = INIT_FAILURE_ALLOC; \
1507	fail_stack.avail = 0; \
1508	} while (0)
1509
1510	# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1511	# else
1512	# define INIT_FAIL_STACK() \
1513	do { \
1514	fail_stack.avail = 0; \
1515	} while (0)
1516
1517	# define RESET_FAIL_STACK()
1518	# endif
1519
1520
1521	/ Double the size of FAIL_STACK, up to approximately `re_max_failures' items.*
1522
1523	Return 1 if succeeds, and 0 if either ran out of memory
1524	allocating space for it or it was already too large.
1525
1526	REGEX_REALLOCATE_STACK requires `destination' be declared. /*
1527
1528	# define DOUBLE_FAIL_STACK(fail_stack) \
1529	((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1530	? 0 \
1531	: ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
1532	REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1533	(fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
1534	((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1535	\
1536	(fail_stack).stack == NULL \
1537	? 0 \
1538	: ((fail_stack).size <<= 1, \
1539	1)))
1540
1541
1542	/ Push pointer POINTER on FAIL_STACK.*
1543	Return 1 if was able to do so and 0 if ran out of memory allocating
1544	space to do so. /*
1545	# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1546	((FAIL_STACK_FULL () \
1547	&& !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1548	? 0 \
1549	: ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1550	1))
1551
1552	/ Push a pointer value onto the failure stack.*
1553	Assumes the variable `fail_stack'. Probably should only
1554	be called from within `PUSH_FAILURE_POINT'. /*
1555	# define PUSH_FAILURE_POINTER(item) \
1556	fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1557
1558	/ This pushes an integer-valued item onto the failure stack.*
1559	Assumes the variable `fail_stack'. Probably should only
1560	be called from within `PUSH_FAILURE_POINT'. /*
1561	# define PUSH_FAILURE_INT(item) \
1562	fail_stack.stack[fail_stack.avail++].integer = (item)
1563
1564	/ Push a fail_stack_elt_t value onto the failure stack.*
1565	Assumes the variable `fail_stack'. Probably should only
1566	be called from within `PUSH_FAILURE_POINT'. /*
1567	# define PUSH_FAILURE_ELT(item) \
1568	fail_stack.stack[fail_stack.avail++] = (item)
1569
1570	/ These three POP... operations complement the three PUSH... operations.*
1571	All assume that `fail_stack' is nonempty. /*
1572	# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1573	# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1574	# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1575
1576	/ Used to omit pushing failure point id's when we're not debugging. /
1577	# ifdef DEBUG
1578	# define DEBUG_PUSH PUSH_FAILURE_INT
1579	# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1580	# else
1581	# define DEBUG_PUSH(item)
1582	# define DEBUG_POP(item_addr)
1583	# endif
1584
1585
1586	/ Push the information about the state we will need*
1587	if we ever fail back to it.
1588
1589	Requires variables fail_stack, regstart, regend, reg_info, and
1590	num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
1591	be declared.
1592
1593	Does `return FAILURE_CODE' if runs out of memory. /*
1594
1595	# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1596	do { \
1597	char *destination; \
1598	/* Must be int, so when we don't save any registers, the arithmetic \
1599	of 0 + -1 isn't done as unsigned. */ \
1600	/* Can't be int, since there is not a shred of a guarantee that int \
1601	is wide enough to hold a value of something to which pointer can \
1602	be assigned */ \
1603	active_reg_t this_reg; \
1604	\
1605	DEBUG_STATEMENT (failure_id++); \
1606	DEBUG_STATEMENT (nfailure_points_pushed++); \
1607	DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1608	DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1609	DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1610	\
1611	DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
1612	DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1613	\
1614	/* Ensure we have enough space allocated for what we will push. */ \
1615	while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1616	{ \
1617	if (!DOUBLE_FAIL_STACK (fail_stack)) \
1618	return failure_code; \
1619	\
1620	DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1621	(fail_stack).size); \
1622	DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1623	} \
1624	\
1625	/* Push the info, starting with the registers. */ \
1626	DEBUG_PRINT1 ("\n"); \
1627	\
1628	if (1) \
1629	for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1630	this_reg++) \
1631	{ \
1632	DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
1633	DEBUG_STATEMENT (num_regs_pushed++); \
1634	\
1635	DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1636	PUSH_FAILURE_POINTER (regstart[this_reg]); \
1637	\
1638	DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1639	PUSH_FAILURE_POINTER (regend[this_reg]); \
1640	\
1641	DEBUG_PRINT2 (" info: %p\n ", \
1642	reg_info[this_reg].word.pointer); \
1643	DEBUG_PRINT2 (" match_null=%d", \
1644	REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1645	DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1646	DEBUG_PRINT2 (" matched_something=%d", \
1647	MATCHED_SOMETHING (reg_info[this_reg])); \
1648	DEBUG_PRINT2 (" ever_matched=%d", \
1649	EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1650	DEBUG_PRINT1 ("\n"); \
1651	PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1652	} \
1653	\
1654	DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
1655	PUSH_FAILURE_INT (lowest_active_reg); \
1656	\
1657	DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
1658	PUSH_FAILURE_INT (highest_active_reg); \
1659	\
1660	DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
1661	DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1662	PUSH_FAILURE_POINTER (pattern_place); \
1663	\
1664	DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
1665	DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1666	size2); \
1667	DEBUG_PRINT1 ("'\n"); \
1668	PUSH_FAILURE_POINTER (string_place); \
1669	\
1670	DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1671	DEBUG_PUSH (failure_id); \
1672	} while (0)
1673
1674	# ifndef DEFINED_ONCE
1675	/ This is the number of items that are pushed and popped on the stack*
1676	for each register. /*
1677	# define NUM_REG_ITEMS 3
1678
1679	/ Individual items aside from the registers. /
1680	# ifdef DEBUG
1681	# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1682	# else
1683	# define NUM_NONREG_ITEMS 4
1684	# endif
1685
1686	/ We push at most this many items on the stack. /
1687	/ We used to use (num_regs - 1), which is the number of registers*
1688	this regexp will save; but that was changed to 5
1689	to avoid stack overflow for a regexp with lots of parens. /*
1690	# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1691
1692	/ We actually push this many items. /
1693	# define NUM_FAILURE_ITEMS \
1694	(((0 \
1695	? 0 : highest_active_reg - lowest_active_reg + 1) \
1696	* NUM_REG_ITEMS) \
1697	+ NUM_NONREG_ITEMS)
1698
1699	/ How many items can still be added to the stack without overflowing it. /
1700	# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1701	# endif /* not DEFINED_ONCE */
1702
1703
1704	/ Pops what PUSH_FAIL_STACK pushes.*
1705
1706	We restore into the parameters, all of which should be lvalues:
1707	STR -- the saved data position.
1708	PAT -- the saved pattern position.
1709	LOW_REG, HIGH_REG -- the highest and lowest active registers.
1710	REGSTART, REGEND -- arrays of string positions.
1711	REG_INFO -- array of information about each subexpression.
1712
1713	Also assumes the variables `fail_stack' and (if debugging), `bufp',
1714	`pend', `string1', `size1', `string2', and `size2'. /*
1715	# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1716	{ \
1717	DEBUG_STATEMENT (unsigned failure_id;) \
1718	active_reg_t this_reg; \
1719	const UCHAR_T *string_temp; \
1720	\
1721	assert (!FAIL_STACK_EMPTY ()); \
1722	\
1723	/* Remove failure points and point to how many regs pushed. */ \
1724	DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1725	DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1726	DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1727	\
1728	assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1729	\
1730	DEBUG_POP (&failure_id); \
1731	DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1732	\
1733	/* If the saved string location is NULL, it came from an \
1734	on_failure_keep_string_jump opcode, and we want to throw away the \
1735	saved NULL, thus retaining our current position in the string. */ \
1736	string_temp = POP_FAILURE_POINTER (); \
1737	if (string_temp != NULL) \
1738	str = (const CHAR_T *) string_temp; \
1739	\
1740	DEBUG_PRINT2 (" Popping string %p: `", str); \
1741	DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1742	DEBUG_PRINT1 ("'\n"); \
1743	\
1744	pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
1745	DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
1746	DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1747	\
1748	/* Restore register info. */ \
1749	high_reg = (active_reg_t) POP_FAILURE_INT (); \
1750	DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
1751	\
1752	low_reg = (active_reg_t) POP_FAILURE_INT (); \
1753	DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
1754	\
1755	if (1) \
1756	for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1757	{ \
1758	DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
1759	\
1760	reg_info[this_reg].word = POP_FAILURE_ELT (); \
1761	DEBUG_PRINT2 (" info: %p\n", \
1762	reg_info[this_reg].word.pointer); \
1763	\
1764	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1765	DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1766	\
1767	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1768	DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1769	} \
1770	else \
1771	{ \
1772	for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1773	{ \
1774	reg_info[this_reg].word.integer = 0; \
1775	regend[this_reg] = 0; \
1776	regstart[this_reg] = 0; \
1777	} \
1778	highest_active_reg = high_reg; \
1779	} \
1780	\
1781	set_regs_matched_done = 0; \
1782	DEBUG_STATEMENT (nfailure_points_popped++); \
1783	} /* POP_FAILURE_POINT */
1784
1785	/ Structure for per-register (a.k.a. per-group) information.*
1786	Other register information, such as the
1787	starting and ending positions (which are addresses), and the list of
1788	inner groups (which is a bits list) are maintained in separate
1789	variables.
1790
1791	We are making a (strictly speaking) nonportable assumption here: that
1792	the compiler will pack our bit fields into something that fits into
1793	the type of `word', i.e., is something that fits into one item on the
1794	failure stack. /*
1795
1796
1797	/ Declarations and macros for re_match_2. /
1798
1799	typedef union
1800	{
1801	PREFIX(fail_stack_elt_t) word;
1802	struct
1803	{
1804	/ This field is one if this group can match the empty string,*
1805	zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. /*
1806	# define MATCH_NULL_UNSET_VALUE 3
1807	unsigned match_null_string_p : `2`;
1808	unsigned is_active : `1`;
1809	unsigned matched_something : `1`;
1810	unsigned ever_matched_something : `1`;
1811	} bits;
1812	} PREFIX(register_info_type);
1813
1814	# ifndef DEFINED_ONCE
1815	# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1816	# define IS_ACTIVE(R) ((R).bits.is_active)
1817	# define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1818	# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1819
1820
1821	/ Call this when have matched a real character; it sets `matched' flags*
1822	for the subexpressions which we are currently inside. Also records
1823	that those subexprs have matched. /*
1824	# define SET_REGS_MATCHED() \
1825	do \
1826	{ \
1827	if (!set_regs_matched_done) \
1828	{ \
1829	active_reg_t r; \
1830	set_regs_matched_done = 1; \
1831	for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1832	{ \
1833	MATCHED_SOMETHING (reg_info[r]) \
1834	= EVER_MATCHED_SOMETHING (reg_info[r]) \
1835	= 1; \
1836	} \
1837	} \
1838	} \
1839	while (0)
1840	# endif /* not DEFINED_ONCE */
1841
1842	/ Registers are set to a sentinel when they haven't yet matched. /
1843	static CHAR_T PREFIX(reg_unset_dummy);
1844	# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1845	# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1846
1847	/ Subroutine declarations and macros for regex_compile. /
1848	static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T loc, int* arg);
1849	static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
1850	int arg1, int arg2);
1851	static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
1852	int arg, UCHAR_T *end);
1853	static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
1854	int arg1, int arg2, UCHAR_T *end);
1855	static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
1856	const CHAR_T *p,
1857	reg_syntax_t syntax);
1858	static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
1859	const CHAR_T *pend,
1860	reg_syntax_t syntax);
1861	# ifdef WCHAR
1862	static reg_errcode_t wcs_compile_range (CHAR_T range_start,
1863	const CHAR_T **p_ptr,
1864	const CHAR_T *pend,
1865	char *translate,
1866	reg_syntax_t syntax,
1867	UCHAR_T *b,
1868	CHAR_T *char_set);
1869	static void insert_space (int num, CHAR_T loc, CHAR_T end);
1870	# else /* BYTE */
1871	static reg_errcode_t byte_compile_range (unsigned int range_start,
1872	const char **p_ptr,
1873	const char *pend,
1874	char *translate,
1875	reg_syntax_t syntax,
1876	unsigned char *b);
1877	# endif /* WCHAR */
1878
1879	/ Fetch the next character in the uncompiled pattern---translating it*
1880	if necessary. Also cast from a signed character in the constant
1881	string passed to us by the user to an unsigned char that we can use
1882	as an array index (in, e.g., `translate'). /*
1883	/ ifdef MBS_SUPPORT, we translate only if character <= 0xff,*
1884	because it is impossible to allocate 4GB array for some encodings
1885	which have 4 byte character_set like UCS4. /*
1886	# ifndef PATFETCH
1887	# ifdef WCHAR
1888	# define PATFETCH(c) \
1889	do {if (p == pend) return REG_EEND; \
1890	c = (UCHAR_T) *p++; \
1891	if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
1892	} while (0)
1893	# else /* BYTE */
1894	# define PATFETCH(c) \
1895	do {if (p == pend) return REG_EEND; \
1896	c = (unsigned char) *p++; \
1897	if (translate) c = (unsigned char) translate[c]; \
1898	} while (0)
1899	# endif /* WCHAR */
1900	# endif
1901
1902	/ Fetch the next character in the uncompiled pattern, with no*
1903	translation. /*
1904	# define PATFETCH_RAW(c) \
1905	do {if (p == pend) return REG_EEND; \
1906	c = (UCHAR_T) *p++; \
1907	} while (0)
1908
1909	/ Go backwards one character in the pattern. /
1910	# define PATUNFETCH p--
1911
1912
1913	/ If `translate' is non-null, return translate[D], else just D. We*
1914	cast the subscript to translate because some data is declared as
1915	`char ', to avoid warnings when a string constant is passed. But*
1916	when we use a character as a subscript we must make it unsigned. /*
1917	/ ifdef MBS_SUPPORT, we translate only if character <= 0xff,*
1918	because it is impossible to allocate 4GB array for some encodings
1919	which have 4 byte character_set like UCS4. /*
1920
1921	# ifndef TRANSLATE
1922	# ifdef WCHAR
1923	# define TRANSLATE(d) \
1924	((translate && ((UCHAR_T) (d)) <= 0xff) \
1925	? (char) translate[(unsigned char) (d)] : (d))
1926	# else /* BYTE */
1927	# define TRANSLATE(d) \
1928	(translate ? (char) translate[(unsigned char) (d)] : (char) (d))
1929	# endif /* WCHAR */
1930	# endif
1931
1932
1933	/ Macros for outputting the compiled pattern into `buffer'. /
1934
1935	/ If the buffer isn't allocated when it comes in, use this. /
1936	# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
1937
1938	/ Make sure we have at least N more bytes of space in buffer. /
1939	# ifdef WCHAR
1940	# define GET_BUFFER_SPACE(n) \
1941	while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
1942	+ (n)*sizeof(CHAR_T)) > bufp->allocated) \
1943	EXTEND_BUFFER ()
1944	# else /* BYTE */
1945	# define GET_BUFFER_SPACE(n) \
1946	while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
1947	EXTEND_BUFFER ()
1948	# endif /* WCHAR */
1949
1950	/ Make sure we have one more byte of buffer space and then add C to it. /
1951	# define BUF_PUSH(c) \
1952	do { \
1953	GET_BUFFER_SPACE (1); \
1954	*b++ = (UCHAR_T) (c); \
1955	} while (0)
1956
1957
1958	/ Ensure we have two more bytes of buffer space and then append C1 and C2. /
1959	# define BUF_PUSH_2(c1, c2) \
1960	do { \
1961	GET_BUFFER_SPACE (2); \
1962	*b++ = (UCHAR_T) (c1); \
1963	*b++ = (UCHAR_T) (c2); \
1964	} while (0)
1965
1966
1967	/ As with BUF_PUSH_2, except for three bytes. /
1968	# define BUF_PUSH_3(c1, c2, c3) \
1969	do { \
1970	GET_BUFFER_SPACE (3); \
1971	*b++ = (UCHAR_T) (c1); \
1972	*b++ = (UCHAR_T) (c2); \
1973	*b++ = (UCHAR_T) (c3); \
1974	} while (0)
1975
1976	/ Store a jump with opcode OP at LOC to location TO. We store a*
1977	relative address offset by the three bytes the jump itself occupies. /*
1978	# define STORE_JUMP(op, loc, to) \
1979	PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
1980
1981	/ Likewise, for a two-argument jump. /
1982	# define STORE_JUMP2(op, loc, to, arg) \
1983	PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
1984
1985	/ Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. /
1986	# define INSERT_JUMP(op, loc, to) \
1987	PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
1988
1989	/ Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. /
1990	# define INSERT_JUMP2(op, loc, to, arg) \
1991	PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
1992	arg, b)
1993
1994	/ This is not an arbitrary limit: the arguments which represent offsets*
1995	into the pattern are two bytes long. So if 2^16 bytes turns out to
1996	be too small, many things would have to change. /*
1997	/ Any other compiler which, like MSC, has allocation limit below 2^16*
1998	bytes will have to use approach similar to what was done below for
1999	MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
2000	reallocating to 0 bytes. Such thing is not going to work too well.
2001	You have been warned!! /*
2002	# ifndef DEFINED_ONCE
2003	# if defined _MSC_VER && !defined WIN32
2004	/ Microsoft C 16-bit versions limit malloc to approx 65512 bytes.*
2005	The REALLOC define eliminates a flurry of conversion warnings,
2006	but is not required. /*
2007	# define MAX_BUF_SIZE 65500L
2008	# define REALLOC(p,s) realloc ((p), (size_t) (s))
2009	# else
2010	# define MAX_BUF_SIZE (1L << 16)
2011	# define REALLOC(p,s) realloc ((p), (s))
2012	# endif
2013
2014	/ Extend the buffer by twice its current size via realloc and*
2015	reset the pointers that pointed into the old block to point to the
2016	correct places in the new one. If extending the buffer results in it
2017	being larger than MAX_BUF_SIZE, then flag memory exhausted. /*
2018	# if __BOUNDED_POINTERS__
2019	# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2020	# define MOVE_BUFFER_POINTER(P) \
2021	(__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2022	# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
2023	else \
2024	{ \
2025	SET_HIGH_BOUND (b); \
2026	SET_HIGH_BOUND (begalt); \
2027	if (fixup_alt_jump) \
2028	SET_HIGH_BOUND (fixup_alt_jump); \
2029	if (laststart) \
2030	SET_HIGH_BOUND (laststart); \
2031	if (pending_exact) \
2032	SET_HIGH_BOUND (pending_exact); \
2033	}
2034	# else
2035	# define MOVE_BUFFER_POINTER(P) (P) += incr
2036	# define ELSE_EXTEND_BUFFER_HIGH_BOUND
2037	# endif
2038	# endif /* not DEFINED_ONCE */
2039
2040	# ifdef WCHAR
2041	# define EXTEND_BUFFER() \
2042	do { \
2043	UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2044	int wchar_count; \
2045	if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
2046	return REG_ESIZE; \
2047	bufp->allocated <<= 1; \
2048	if (bufp->allocated > MAX_BUF_SIZE) \
2049	bufp->allocated = MAX_BUF_SIZE; \
2050	/* How many characters the new buffer can have? */ \
2051	wchar_count = bufp->allocated / sizeof(UCHAR_T); \
2052	if (wchar_count == 0) wchar_count = 1; \
2053	/* Truncate the buffer to CHAR_T align. */ \
2054	bufp->allocated = wchar_count * sizeof(UCHAR_T); \
2055	RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
2056	bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
2057	if (COMPILED_BUFFER_VAR == NULL) \
2058	return REG_ESPACE; \
2059	/* If the buffer moved, move all the pointers into it. */ \
2060	if (old_buffer != COMPILED_BUFFER_VAR) \
2061	{ \
2062	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
2063	MOVE_BUFFER_POINTER (b); \
2064	MOVE_BUFFER_POINTER (begalt); \
2065	if (fixup_alt_jump) \
2066	MOVE_BUFFER_POINTER (fixup_alt_jump); \
2067	if (laststart) \
2068	MOVE_BUFFER_POINTER (laststart); \
2069	if (pending_exact) \
2070	MOVE_BUFFER_POINTER (pending_exact); \
2071	} \
2072	ELSE_EXTEND_BUFFER_HIGH_BOUND \
2073	} while (0)
2074	# else /* BYTE */
2075	# define EXTEND_BUFFER() \
2076	do { \
2077	UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2078	if (bufp->allocated == MAX_BUF_SIZE) \
2079	return REG_ESIZE; \
2080	bufp->allocated <<= 1; \
2081	if (bufp->allocated > MAX_BUF_SIZE) \
2082	bufp->allocated = MAX_BUF_SIZE; \
2083	bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
2084	bufp->allocated); \
2085	if (COMPILED_BUFFER_VAR == NULL) \
2086	return REG_ESPACE; \
2087	/* If the buffer moved, move all the pointers into it. */ \
2088	if (old_buffer != COMPILED_BUFFER_VAR) \
2089	{ \
2090	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
2091	MOVE_BUFFER_POINTER (b); \
2092	MOVE_BUFFER_POINTER (begalt); \
2093	if (fixup_alt_jump) \
2094	MOVE_BUFFER_POINTER (fixup_alt_jump); \
2095	if (laststart) \
2096	MOVE_BUFFER_POINTER (laststart); \
2097	if (pending_exact) \
2098	MOVE_BUFFER_POINTER (pending_exact); \
2099	} \
2100	ELSE_EXTEND_BUFFER_HIGH_BOUND \
2101	} while (0)
2102	# endif /* WCHAR */
2103
2104	# ifndef DEFINED_ONCE
2105	/ Since we have one byte reserved for the register number argument to*
2106	{start,stop}_memory, the maximum number of groups we can report
2107	things about is what fits in that byte. /*
2108	# define MAX_REGNUM 255
2109
2110	/ But patterns can have more than `MAX_REGNUM' registers. We just*
2111	ignore the excess. /*
2112	typedef unsigned regnum_t;
2113
2114
2115	/ Macros for the compile stack. /
2116
2117	/ Since offsets can go either forwards or backwards, this type needs to*
2118	be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. /*
2119	/ int may be not enough when sizeof(int) == 2. /
2120	typedef long pattern_offset_t;
2121
2122	typedef struct
2123	{
2124	pattern_offset_t begalt_offset;
2125	pattern_offset_t fixup_alt_jump;
2126	pattern_offset_t inner_group_offset;
2127	pattern_offset_t laststart_offset;
2128	regnum_t regnum;
2129	} compile_stack_elt_t;
2130
2131
2132	typedef struct
2133	{
2134	compile_stack_elt_t *stack;
2135	unsigned size;
2136	unsigned avail; / Offset of next open position. /
2137	} compile_stack_type;
2138
2139
2140	# define INIT_COMPILE_STACK_SIZE 32
2141
2142	# define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
2143	# define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
2144
2145	/ The next available element. /
2146	# define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2147
2148	# endif /* not DEFINED_ONCE */
2149
2150	/ Set the bit for character C in a list. /
2151	# ifndef DEFINED_ONCE
2152	# define SET_LIST_BIT(c) \
2153	(b[((unsigned char) (c)) / BYTEWIDTH] \
2154	\|= 1 << (((unsigned char) c) % BYTEWIDTH))
2155	# endif /* DEFINED_ONCE */
2156
2157	/ Get the next unsigned number in the uncompiled pattern. /
2158	# define GET_UNSIGNED_NUMBER(num) \
2159	{ \
2160	while (p != pend) \
2161	{ \
2162	PATFETCH (c); \
2163	if (c < '0' \|\| c > '9') \
2164	break; \
2165	if (num <= RE_DUP_MAX) \
2166	{ \
2167	if (num < 0) \
2168	num = 0; \
2169	num = num * 10 + c - '0'; \
2170	} \
2171	} \
2172	}
2173
2174	# ifndef DEFINED_ONCE
2175	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
2176	/ The GNU C library provides support for user-defined character classes*
2177	and the functions from ISO C amendement 1. /*
2178	# ifdef CHARCLASS_NAME_MAX
2179	# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2180	# else
2181	/ This shouldn't happen but some implementation might still have this*
2182	problem. Use a reasonable default value. /*
2183	# define CHAR_CLASS_MAX_LENGTH 256
2184	# endif
2185
2186	# ifdef _LIBC
2187	# define IS_CHAR_CLASS(string) __wctype (string)
2188	# else
2189	# define IS_CHAR_CLASS(string) wctype (string)
2190	# endif
2191	# else
2192	# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
2193
2194	# define IS_CHAR_CLASS(string) \
2195	(STREQ (string, "alpha") \|\| STREQ (string, "upper") \
2196	\|\| STREQ (string, "lower") \|\| STREQ (string, "digit") \
2197	\|\| STREQ (string, "alnum") \|\| STREQ (string, "xdigit") \
2198	\|\| STREQ (string, "space") \|\| STREQ (string, "print") \
2199	\|\| STREQ (string, "punct") \|\| STREQ (string, "graph") \
2200	\|\| STREQ (string, "cntrl") \|\| STREQ (string, "blank"))
2201	# endif
2202	# endif /* DEFINED_ONCE */
2203
2204	# ifndef MATCH_MAY_ALLOCATE
2205
2206	/ If we cannot allocate large objects within re_match_2_internal,*
2207	we make the fail stack and register vectors global.
2208	The fail stack, we grow to the maximum size when a regexp
2209	is compiled.
2210	The register vectors, we adjust in size each time we
2211	compile a regexp, according to the number of registers it needs. /*
2212
2213	static PREFIX(fail_stack_type) fail_stack;
2214
2215	/ Size with which the following vectors are currently allocated.*
2216	That is so we can make them bigger as needed,
2217	but never make them smaller. /*
2218	# ifdef DEFINED_ONCE
2219	static int regs_allocated_size;
2220
2221	static const char regstart, regend;
2222	static const char old_regstart, old_regend;
2223	static const char best_regstart, best_regend;
2224	static const char **reg_dummy;
2225	# endif /* DEFINED_ONCE */
2226
2227	static PREFIX(register_info_type) *PREFIX(reg_info);
2228	static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2229
2230	/ Make the register vectors big enough for NUM_REGS registers,*
2231	but don't make them smaller. /*
2232
2233	static void
2234	PREFIX(regex_grow_registers) (int num_regs)
2235	{
2236	if (num_regs > regs_allocated_size)
2237	{
2238	RETALLOC_IF (regstart, num_regs, const char *);
2239	RETALLOC_IF (regend, num_regs, const char *);
2240	RETALLOC_IF (old_regstart, num_regs, const char *);
2241	RETALLOC_IF (old_regend, num_regs, const char *);
2242	RETALLOC_IF (best_regstart, num_regs, const char *);
2243	RETALLOC_IF (best_regend, num_regs, const char *);
2244	RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2245	RETALLOC_IF (reg_dummy, num_regs, const char *);
2246	RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2247
2248	regs_allocated_size = num_regs;
2249	}
2250	}
2251
2252	# endif /* not MATCH_MAY_ALLOCATE */
2253
2254	# ifndef DEFINED_ONCE
2255	static boolean group_in_compile_stack (compile_stack_type compile_stack,
2256	regnum_t regnum);
2257	# endif /* not DEFINED_ONCE */
2258
2259	/ `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.*
2260	Returns one of error codes defined in `regex.h', or zero for success.
2261
2262	Assumes the `allocated' (and perhaps `buffer') and `translate'
2263	fields are set in BUFP on entry.
2264
2265	If it succeeds, results are put in BUFP (if it returns an error, the
2266	contents of BUFP are undefined):
2267	`buffer' is the compiled pattern;
2268	`syntax' is set to SYNTAX;
2269	`used' is set to the length of the compiled pattern;
2270	`fastmap_accurate' is zero;
2271	`re_nsub' is the number of subexpressions in PATTERN;
2272	`not_bol' and `not_eol' are zero;
2273
2274	The `fastmap' and `newline_anchor' fields are neither
2275	examined nor set. /*
2276
2277	/ Return, freeing storage we allocated. /
2278	# ifdef WCHAR
2279	# define FREE_STACK_RETURN(value) \
2280	return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2281	# else
2282	# define FREE_STACK_RETURN(value) \
2283	return (free (compile_stack.stack), value)
2284	# endif /* WCHAR */
2285
2286	static reg_errcode_t
2287	PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
2288	size_t ARG_PREFIX(size), reg_syntax_t syntax,
2289	struct re_pattern_buffer *bufp)
2290	{
2291	/ We fetch characters from PATTERN here. Even though PATTERN is*
2292	`char ' (i.e., signed), we declare these variables as unsigned, so*
2293	they can be reliably used as array indices. /*
2294	register UCHAR_T c, c1;
2295
2296	#ifdef WCHAR
2297	/ A temporary space to keep wchar_t pattern and compiled pattern. /
2298	CHAR_T pattern, COMPILED_BUFFER_VAR;
2299	size_t size;
2300	/ offset buffer for optimization. See convert_mbs_to_wc. /
2301	int *mbs_offset = NULL;
2302	/ It hold whether each wchar_t is binary data or not. /
2303	char *is_binary = NULL;
2304	/ A flag whether exactn is handling binary data or not. /
2305	char is_exactn_bin = FALSE;
2306	#endif /* WCHAR */
2307
2308	/ A random temporary spot in PATTERN. /
2309	const CHAR_T *p1;
2310
2311	/ Points to the end of the buffer, where we should append. /
2312	register UCHAR_T *b;
2313
2314	/ Keeps track of unclosed groups. /
2315	compile_stack_type compile_stack;
2316
2317	/ Points to the current (ending) position in the pattern. /
2318	#ifdef WCHAR
2319	const CHAR_T *p;
2320	const CHAR_T *pend;
2321	#else /* BYTE */
2322	const CHAR_T *p = pattern;
2323	const CHAR_T *pend = pattern + size;
2324	#endif /* WCHAR */
2325
2326	/ How to translate the characters in the pattern. /
2327	RE_TRANSLATE_TYPE translate = bufp->translate;
2328
2329	/ Address of the count-byte of the most recently inserted `exactn'*
2330	command. This makes it possible to tell if a new exact-match
2331	character can be added to that command or if the character requires
2332	a new `exactn' command. /*
2333	UCHAR_T *pending_exact = `0`;
2334
2335	/ Address of start of the most recently finished expression.*
2336	This tells, e.g., postfix where to find the start of its*
2337	operand. Reset at the beginning of groups and alternatives. /*
2338	UCHAR_T *laststart = `0`;
2339
2340	/ Address of beginning of regexp, or inside of last group. /
2341	UCHAR_T *begalt;
2342
2343	/ Address of the place where a forward jump should go to the end of*
2344	the containing expression. Each alternative of an `or' -- except the
2345	last -- ends with a forward jump of this sort. /*
2346	UCHAR_T *fixup_alt_jump = `0`;
2347
2348	/ Counts open-groups as they are encountered. Remembered for the*
2349	matching close-group on the compile stack, so the same register
2350	number is put in the stop_memory as the start_memory. /*
2351	regnum_t regnum = `0`;
2352
2353	#ifdef WCHAR
2354	/ Initialize the wchar_t PATTERN and offset_buffer. /
2355	p = pend = pattern = TALLOC(csize + `1`, CHAR_T);
2356	mbs_offset = TALLOC(csize + `1`, int);
2357	is_binary = TALLOC(csize + `1`, char);
2358	if (pattern == NULL \|\| mbs_offset == NULL \|\| is_binary == NULL)
2359	{
2360	free(pattern);
2361	free(mbs_offset);
2362	free(is_binary);
2363	return REG_ESPACE;
2364	}
2365	pattern[csize] = L`'\0'`; / sentinel /
2366	size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2367	pend = p + size;
2368	if (size < `0`)
2369	{
2370	free(pattern);
2371	free(mbs_offset);
2372	free(is_binary);
2373	return REG_BADPAT;
2374	}
2375	#endif
2376
2377	#ifdef DEBUG
2378	DEBUG_PRINT1 ("\nCompiling pattern: ");
2379	if (debug)
2380	{
2381	unsigned debug_count;
2382
2383	for (debug_count = `0`; debug_count < size; debug_count++)
2384	PUT_CHAR (pattern[debug_count]);
2385	putchar (`'\n'`);
2386	}
2387	#endif /* DEBUG */
2388
2389	/ Initialize the compile stack. /
2390	compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2391	if (compile_stack.stack == NULL)
2392	{
2393	#ifdef WCHAR
2394	free(pattern);
2395	free(mbs_offset);
2396	free(is_binary);
2397	#endif
2398	return REG_ESPACE;
2399	}
2400
2401	compile_stack.size = INIT_COMPILE_STACK_SIZE;
2402	compile_stack.avail = `0`;
2403
2404	/ Initialize the pattern buffer. /
2405	bufp->syntax = syntax;
2406	bufp->fastmap_accurate = `0`;
2407	bufp->not_bol = bufp->not_eol = `0`;
2408
2409	/ Set `used' to zero, so that if we return an error, the pattern*
2410	printer (for debugging) will think there's no pattern. We reset it
2411	at the end. /*
2412	bufp->used = `0`;
2413
2414	/ Always count groups, whether or not bufp->no_sub is set. /
2415	bufp->re_nsub = `0`;
2416
2417	#if !defined emacs && !defined SYNTAX_TABLE
2418	/ Initialize the syntax table. /
2419	init_syntax_once ();
2420	#endif
2421
2422	if (bufp->allocated == `0`)
2423	{
2424	if (bufp->buffer)
2425	{ / If zero allocated, but buffer is non-null, try to realloc*
2426	enough space. This loses if buffer's address is bogus, but
2427	that is the user's responsibility. /*
2428	#ifdef WCHAR
2429	/ Free bufp->buffer and allocate an array for wchar_t pattern*
2430	buffer. /*
2431	free(bufp->buffer);
2432	COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2433	UCHAR_T);
2434	#else
2435	RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2436	#endif /* WCHAR */
2437	}
2438	else
2439	{ / Caller did not allocate a buffer. Do it for them. /
2440	COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2441	UCHAR_T);
2442	}
2443
2444	if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2445	#ifdef WCHAR
2446	bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2447	#endif /* WCHAR */
2448	bufp->allocated = INIT_BUF_SIZE;
2449	}
2450	#ifdef WCHAR
2451	else
2452	COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2453	#endif
2454
2455	begalt = b = COMPILED_BUFFER_VAR;
2456
2457	/ Loop through the uncompiled pattern until we're at the end. /
2458	while (p != pend)
2459	{
2460	PATFETCH (c);
2461
2462	switch (c)
2463	{
2464	case `'^'`:
2465	{
2466	if ( / If at start of pattern, it's an operator. /
2467	p == pattern + `1`
2468	/ If context independent, it's an operator. /
2469	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
2470	/ Otherwise, depends on what's come before. /
2471	\|\| PREFIX(at_begline_loc_p) (pattern, p, syntax))
2472	BUF_PUSH (begline);
2473	else
2474	goto normal_char;
2475	}
2476	break;
2477
2478
2479	case `'$'`:
2480	{
2481	if ( / If at end of pattern, it's an operator. /
2482	p == pend
2483	/ If context independent, it's an operator. /
2484	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
2485	/ Otherwise, depends on what's next. /
2486	\|\| PREFIX(at_endline_loc_p) (p, pend, syntax))
2487	BUF_PUSH (endline);
2488	else
2489	goto normal_char;
2490	}
2491	break;
2492
2493
2494	case `'+'`:
2495	case `'?'`:
2496	if ((syntax & RE_BK_PLUS_QM)
2497	\|\| (syntax & RE_LIMITED_OPS))
2498	goto normal_char;
2499	/ Fall through. /
2500	handle_plus:
2501	case `'*'`:
2502	/ If there is no previous pattern... /
2503	if (!laststart)
2504	{
2505	if (syntax & RE_CONTEXT_INVALID_OPS)
2506	FREE_STACK_RETURN (REG_BADRPT);
2507	else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2508	goto normal_char;
2509	}
2510
2511	{
2512	/ Are we optimizing this jump? /
2513	boolean keep_string_p = false;
2514
2515	/ 1 means zero (many) matches is allowed. /
2516	char zero_times_ok = `0`, many_times_ok = `0`;
2517
2518	/ If there is a sequence of repetition chars, collapse it*
2519	down to just one (the right one). We can't combine
2520	interval operators with these because of, e.g., `a{2}',*
2521	which should only match an even number of `a's. /*
2522
2523	for (;;)
2524	{
2525	zero_times_ok \|= c != `'+'`;
2526	many_times_ok \|= c != `'?'`;
2527
2528	if (p == pend)
2529	break;
2530
2531	PATFETCH (c);
2532
2533	if (c == `'*'`
2534	\|\| (!(syntax & RE_BK_PLUS_QM) && (c == `'+'` \|\| c == `'?'`)))
2535	;
2536
2537	else if (syntax & RE_BK_PLUS_QM && c == `'\\'`)
2538	{
2539	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2540
2541	PATFETCH (c1);
2542	if (!(c1 == `'+'` \|\| c1 == `'?'`))
2543	{
2544	PATUNFETCH;
2545	PATUNFETCH;
2546	break;
2547	}
2548
2549	c = c1;
2550	}
2551	else
2552	{
2553	PATUNFETCH;
2554	break;
2555	}
2556
2557	/ If we get here, we found another repeat character. /
2558	}
2559
2560	/ Star, etc. applied to an empty pattern is equivalent*
2561	to an empty pattern. /*
2562	if (!laststart)
2563	break;
2564
2565	/ Now we know whether or not zero matches is allowed*
2566	and also whether or not two or more matches is allowed. /*
2567	if (many_times_ok)
2568	{ / More than one repetition is allowed, so put in at the*
2569	end a backward relative jump from `b' to before the next
2570	jump we're going to put in below (which jumps from
2571	laststart to after this jump).
2572
2573	But if we are at the `' in the exact sequence `.\n',
2574	insert an unconditional jump backwards to the .,
2575	instead of the beginning of the loop. This way we only
2576	push a failure point once, instead of every time
2577	through the loop. /*
2578	assert (p - `1` > pattern);
2579
2580	/ Allocate the space for the jump. /
2581	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
2582
2583	/ We know we are not at the first character of the pattern,*
2584	because laststart was nonzero. And we've already
2585	incremented `p', by the way, to be the character after
2586	the `'. Do we have to do something analogous here*
2587	for null bytes, because of RE_DOT_NOT_NULL? /*
2588	if (TRANSLATE (*(p - `2`)) == TRANSLATE (`'.'`)
2589	&& zero_times_ok
2590	&& p < pend && TRANSLATE (*p) == TRANSLATE (`'\n'`)
2591	&& !(syntax & RE_DOT_NEWLINE))
2592	{ / We have .\n. /*
2593	STORE_JUMP (jump, b, laststart);
2594	keep_string_p = true;
2595	}
2596	else
2597	/ Anything else. /
2598	STORE_JUMP (maybe_pop_jump, b, laststart -
2599	(`1` + OFFSET_ADDRESS_SIZE));
2600
2601	/ We've added more stuff to the buffer. /
2602	b += `1` + OFFSET_ADDRESS_SIZE;
2603	}
2604
2605	/ On failure, jump from laststart to b + 3, which will be the*
2606	end of the buffer after this jump is inserted. /*
2607	/ ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of*
2608	'b + 3'. /*
2609	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
2610	INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2611	: on_failure_jump,
2612	laststart, b + `1` + OFFSET_ADDRESS_SIZE);
2613	pending_exact = `0`;
2614	b += `1` + OFFSET_ADDRESS_SIZE;
2615
2616	if (!zero_times_ok)
2617	{
2618	/ At least one repetition is required, so insert a*
2619	`dummy_failure_jump' before the initial
2620	`on_failure_jump' instruction of the loop. This
2621	effects a skip over that instruction the first time
2622	we hit that loop. /*
2623	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
2624	INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2625	`2` + `2` * OFFSET_ADDRESS_SIZE);
2626	b += `1` + OFFSET_ADDRESS_SIZE;
2627	}
2628	}
2629	break;
2630
2631
2632	case `'.'`:
2633	laststart = b;
2634	BUF_PUSH (anychar);
2635	break;
2636
2637
2638	case `'['`:
2639	{
2640	boolean had_char_class = false;
2641	#ifdef WCHAR
2642	CHAR_T range_start = `0xffffffff`;
2643	#else
2644	unsigned int range_start = `0xffffffff`;
2645	#endif
2646	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2647
2648	#ifdef WCHAR
2649	/ We assume a charset(_not) structure as a wchar_t array.*
2650	charset[0] = (re_opcode_t) charset(_not)
2651	charset[1] = l (= length of char_classes)
2652	charset[2] = m (= length of collating_symbols)
2653	charset[3] = n (= length of equivalence_classes)
2654	charset[4] = o (= length of char_ranges)
2655	charset[5] = p (= length of chars)
2656
2657	charset[6] = char_class (wctype_t)
2658	charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2659	...
2660	charset[l+5] = char_class (wctype_t)
2661
2662	charset[l+6] = collating_symbol (wchar_t)
2663	...
2664	charset[l+m+5] = collating_symbol (wchar_t)
2665	ifdef _LIBC we use the index if
2666	_NL_COLLATE_SYMB_EXTRAMB instead of
2667	wchar_t string.
2668
2669	charset[l+m+6] = equivalence_classes (wchar_t)
2670	...
2671	charset[l+m+n+5] = equivalence_classes (wchar_t)
2672	ifdef _LIBC we use the index in
2673	_NL_COLLATE_WEIGHT instead of
2674	wchar_t string.
2675
2676	charset[l+m+n+6] = range_start
2677	charset[l+m+n+7] = range_end
2678	...
2679	charset[l+m+n+2o+4] = range_start
2680	charset[l+m+n+2o+5] = range_end
2681	ifdef _LIBC we use the value looked up
2682	in _NL_COLLATE_COLLSEQ instead of
2683	wchar_t character.
2684
2685	charset[l+m+n+2o+6] = char
2686	...
2687	charset[l+m+n+2o+p+5] = char
2688
2689	*/
2690
2691	/ We need at least 6 spaces: the opcode, the length of*
2692	char_classes, the length of collating_symbols, the length of
2693	equivalence_classes, the length of char_ranges, the length of
2694	chars. /*
2695	GET_BUFFER_SPACE (`6`);
2696
2697	/ Save b as laststart. And We use laststart as the pointer*
2698	to the first element of the charset here.
2699	In other words, laststart[i] indicates charset[i]. /*
2700	laststart = b;
2701
2702	/ We test `p == '^' twice, instead of using an if
2703	statement, so we only need one BUF_PUSH. /*
2704	BUF_PUSH (*p == `'^'` ? charset_not : charset);
2705	if (*p == `'^'`)
2706	p++;
2707
2708	/ Push the length of char_classes, the length of*
2709	collating_symbols, the length of equivalence_classes, the
2710	length of char_ranges and the length of chars. /*
2711	BUF_PUSH_3 (`0`, `0`, `0`);
2712	BUF_PUSH_2 (`0`, `0`);
2713
2714	/ Remember the first position in the bracket expression. /
2715	p1 = p;
2716
2717	/ charset_not matches newline according to a syntax bit. /
2718	if ((re_opcode_t) b[-`6`] == charset_not
2719	&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2720	{
2721	BUF_PUSH(`'\n'`);
2722	laststart[`5`]++; / Update the length of characters /
2723	}
2724
2725	/ Read in characters and ranges, setting map bits. /
2726	for (;;)
2727	{
2728	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2729
2730	PATFETCH (c);
2731
2732	/ \ might escape characters inside [...] and [^...]. /
2733	if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == `'\\'`)
2734	{
2735	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2736
2737	PATFETCH (c1);
2738	BUF_PUSH(c1);
2739	laststart[`5`]++; / Update the length of chars /
2740	range_start = c1;
2741	continue;
2742	}
2743
2744	/ Could be the end of the bracket expression. If it's*
2745	not (i.e., when the bracket expression is `[]' so
2746	far), the ']' character bit gets set way below. /*
2747	if (c == `']'` && p != p1 + `1`)
2748	break;
2749
2750	/ Look ahead to see if it's a range when the last thing*
2751	was a character class. /*
2752	if (had_char_class && c == `'-'` && *p != `']'`)
2753	FREE_STACK_RETURN (REG_ERANGE);
2754
2755	/ Look ahead to see if it's a range when the last thing*
2756	was a character: if this is a hyphen not at the
2757	beginning or the end of a list, then it's the range
2758	operator. /*
2759	if (c == `'-'`
2760	&& !(p - `2` >= pattern && p[-`2`] == `'['`)
2761	&& !(p - `3` >= pattern && p[-`3`] == `'['` && p[-`2`] == `'^'`)
2762	&& *p != `']'`)
2763	{
2764	reg_errcode_t ret;
2765	/ Allocate the space for range_start and range_end. /
2766	GET_BUFFER_SPACE (`2`);
2767	/ Update the pointer to indicate end of buffer. /
2768	b += `2`;
2769	ret = wcs_compile_range (range_start, &p, pend, translate,
2770	syntax, b, laststart);
2771	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2772	range_start = `0xffffffff`;
2773	}
2774	else if (p[`0`] == `'-'` && p[`1`] != `']'`)
2775	{ / This handles ranges made up of characters only. /
2776	reg_errcode_t ret;
2777
2778	/ Move past the `-'. /
2779	PATFETCH (c1);
2780	/ Allocate the space for range_start and range_end. /
2781	GET_BUFFER_SPACE (`2`);
2782	/ Update the pointer to indicate end of buffer. /
2783	b += `2`;
2784	ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2785	laststart);
2786	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2787	range_start = `0xffffffff`;
2788	}
2789
2790	/ See if we're at the beginning of a possible character*
2791	class. /*
2792	else if (syntax & RE_CHAR_CLASSES && c == `'['` && *p == `':'`)
2793	{ / Leave room for the null. /
2794	char str[CHAR_CLASS_MAX_LENGTH + `1`];
2795
2796	PATFETCH (c);
2797	c1 = `0`;
2798
2799	/ If pattern is `[[:'. /
2800	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801
2802	for (;;)
2803	{
2804	PATFETCH (c);
2805	if ((c == `':'` && *p == `']'`) \|\| p == pend)
2806	break;
2807	if (c1 < CHAR_CLASS_MAX_LENGTH)
2808	str[c1++] = c;
2809	else
2810	/ This is in any case an invalid class name. /
2811	str[`0`] = `'\0'`;
2812	}
2813	str[c1] = `'\0'`;
2814
2815	/ If isn't a word bracketed by `[:' and `:]':*
2816	undo the ending character, the letters, and leave
2817	the leading `:' and `[' (but store them as character). /*
2818	if (c == `':'` && *p == `']'`)
2819	{
2820	wctype_t wt;
2821	uintptr_t alignedp;
2822
2823	/ Query the character class as wctype_t. /
2824	wt = IS_CHAR_CLASS (str);
2825	if (wt == `0`)
2826	FREE_STACK_RETURN (REG_ECTYPE);
2827
2828	/ Throw away the ] at the end of the character*
2829	class. /*
2830	PATFETCH (c);
2831
2832	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2833
2834	/ Allocate the space for character class. /
2835	GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2836	/ Update the pointer to indicate end of buffer. /
2837	b += CHAR_CLASS_SIZE;
2838	/ Move data which follow character classes*
2839	not to violate the data. /*
2840	insert_space(CHAR_CLASS_SIZE,
2841	laststart + `6` + laststart[`1`],
2842	b - `1`);
2843	alignedp = ((uintptr_t)(laststart + `6` + laststart[`1`])
2844	+ __alignof__(wctype_t) - `1`)
2845	& ~(uintptr_t)(__alignof__(wctype_t) - `1`);
2846	/ Store the character class. /
2847	((wctype_t)alignedp) = wt;
2848	/ Update length of char_classes /
2849	laststart[`1`] += CHAR_CLASS_SIZE;
2850
2851	had_char_class = true;
2852	}
2853	else
2854	{
2855	c1++;
2856	while (c1--)
2857	PATUNFETCH;
2858	BUF_PUSH (`'['`);
2859	BUF_PUSH (`':'`);
2860	laststart[`5`] += `2`; / Update the length of characters /
2861	range_start = `':'`;
2862	had_char_class = false;
2863	}
2864	}
2865	else if (syntax & RE_CHAR_CLASSES && c == `'['` && (*p == `'='`
2866	\|\| *p == `'.'`))
2867	{
2868	CHAR_T str[`128`]; / Should be large enough. /
2869	CHAR_T delim = p; /* '=' or '.' /
2870	# ifdef _LIBC
2871	uint32_t nrules =
2872	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2873	# endif
2874	PATFETCH (c);
2875	c1 = `0`;
2876
2877	/ If pattern is `[[=' or '[[.'. /
2878	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2879
2880	for (;;)
2881	{
2882	PATFETCH (c);
2883	if ((c == delim && *p == `']'`) \|\| p == pend)
2884	break;
2885	if (c1 < sizeof (str) - `1`)
2886	str[c1++] = c;
2887	else
2888	/ This is in any case an invalid class name. /
2889	str[`0`] = `'\0'`;
2890	}
2891	str[c1] = `'\0'`;
2892
2893	if (c == delim && *p == `']'` && str[`0`] != `'\0'`)
2894	{
2895	unsigned int i, offset;
2896	/ If we have no collation data we use the default*
2897	collation in which each character is in a class
2898	by itself. It also means that ASCII is the
2899	character set and therefore we cannot have character
2900	with more than one byte in the multibyte
2901	representation. /*
2902
2903	/ If not defined _LIBC, we push the name and*
2904	`\0' for the sake of matching performance. /*
2905	int datasize = c1 + `1`;
2906
2907	# ifdef _LIBC
2908	int32_t idx = `0`;
2909	if (nrules == `0`)
2910	# endif
2911	{
2912	if (c1 != `1`)
2913	FREE_STACK_RETURN (REG_ECOLLATE);
2914	}
2915	# ifdef _LIBC
2916	else
2917	{
2918	const int32_t *table;
2919	const int32_t *weights;
2920	const int32_t *extra;
2921	const int32_t *indirect;
2922	wint_t *cp;
2923
2924	/ This #include defines a local function! /
2925	# include <locale/weightwc.h>
2926
2927	if(delim == `'='`)
2928	{
2929	/ We push the index for equivalence class. /
2930	cp = (wint_t*)str;
2931
2932	table = (const int32_t *)
2933	_NL_CURRENT (LC_COLLATE,
2934	_NL_COLLATE_TABLEWC);
2935	weights = (const int32_t *)
2936	_NL_CURRENT (LC_COLLATE,
2937	_NL_COLLATE_WEIGHTWC);
2938	extra = (const int32_t *)
2939	_NL_CURRENT (LC_COLLATE,
2940	_NL_COLLATE_EXTRAWC);
2941	indirect = (const int32_t *)
2942	_NL_CURRENT (LC_COLLATE,
2943	_NL_COLLATE_INDIRECTWC);
2944
2945	idx = findidx ((const wint_t**)&cp);
2946	if (idx == `0` \|\| cp < (wint_t*) str + c1)
2947	/ This is no valid character. /
2948	FREE_STACK_RETURN (REG_ECOLLATE);
2949
2950	str[`0`] = (wchar_t)idx;
2951	}
2952	else / delim == '.' /
2953	{
2954	/ We push collation sequence value*
2955	for collating symbol. /*
2956	int32_t table_size;
2957	const int32_t *symb_table;
2958	const unsigned char *extra;
2959	int32_t idx;
2960	int32_t elem;
2961	int32_t second;
2962	int32_t hash;
2963	char char_str[c1];
2964
2965	/ We have to convert the name to a single-byte*
2966	string. This is possible since the names
2967	consist of ASCII characters and the internal
2968	representation is UCS4. /*
2969	for (i = `0`; i < c1; ++i)
2970	char_str[i] = str[i];
2971
2972	table_size =
2973	_NL_CURRENT_WORD (LC_COLLATE,
2974	_NL_COLLATE_SYMB_HASH_SIZEMB);
2975	symb_table = (const int32_t *)
2976	_NL_CURRENT (LC_COLLATE,
2977	_NL_COLLATE_SYMB_TABLEMB);
2978	extra = (const unsigned char *)
2979	_NL_CURRENT (LC_COLLATE,
2980	_NL_COLLATE_SYMB_EXTRAMB);
2981
2982	/ Locate the character in the hashing table. /
2983	hash = elem_hash (char_str, c1);
2984
2985	idx = `0`;
2986	elem = hash % table_size;
2987	second = hash % (table_size - `2`);
2988	while (symb_table[`2` * elem] != `0`)
2989	{
2990	/ First compare the hashing value. /
2991	if (symb_table[`2` * elem] == hash
2992	&& c1 == extra[symb_table[`2` * elem + `1`]]
2993	&& memcmp (char_str,
2994	&extra[symb_table[`2` * elem + `1`]
2995	+ `1`], c1) == `0`)
2996	{
2997	/ Yep, this is the entry. /
2998	idx = symb_table[`2` * elem + `1`];
2999	idx += `1` + extra[idx];
3000	break;
3001	}
3002
3003	/ Next entry. /
3004	elem += second;
3005	}
3006
3007	if (symb_table[`2` * elem] != `0`)
3008	{
3009	/ Compute the index of the byte sequence*
3010	in the table. /*
3011	idx += `1` + extra[idx];
3012	/ Adjust for the alignment. /
3013	idx = (idx + `3`) & ~`3`;
3014
3015	str[`0`] = (wchar_t) idx + `4`;
3016	}
3017	else if (symb_table[`2` * elem] == `0` && c1 == `1`)
3018	{
3019	/ No valid character. Match it as a*
3020	single byte character. /*
3021	had_char_class = false;
3022	BUF_PUSH(str[`0`]);
3023	/ Update the length of characters /
3024	laststart[`5`]++;
3025	range_start = str[`0`];
3026
3027	/ Throw away the ] at the end of the*
3028	collating symbol. /*
3029	PATFETCH (c);
3030	/ exit from the switch block. /
3031	continue;
3032	}
3033	else
3034	FREE_STACK_RETURN (REG_ECOLLATE);
3035	}
3036	datasize = `1`;
3037	}
3038	# endif
3039	/ Throw away the ] at the end of the equivalence*
3040	class (or collating symbol). /*
3041	PATFETCH (c);
3042
3043	/ Allocate the space for the equivalence class*
3044	(or collating symbol) (and '\0' if needed). /*
3045	GET_BUFFER_SPACE(datasize);
3046	/ Update the pointer to indicate end of buffer. /
3047	b += datasize;
3048
3049	if (delim == `'='`)
3050	{ / equivalence class /
3051	/ Calculate the offset of char_ranges,*
3052	which is next to equivalence_classes. /*
3053	offset = laststart[`1`] + laststart[`2`]
3054	+ laststart[`3`] +`6`;
3055	/ Insert space. /
3056	insert_space(datasize, laststart + offset, b - `1`);
3057
3058	/ Write the equivalence_class and \0. /
3059	for (i = `0` ; i < datasize ; i++)
3060	laststart[offset + i] = str[i];
3061
3062	/ Update the length of equivalence_classes. /
3063	laststart[`3`] += datasize;
3064	had_char_class = true;
3065	}
3066	else / delim == '.' /
3067	{ / collating symbol /
3068	/ Calculate the offset of the equivalence_classes,*
3069	which is next to collating_symbols. /*
3070	offset = laststart[`1`] + laststart[`2`] + `6`;
3071	/ Insert space and write the collationg_symbol*
3072	and \0. /*
3073	insert_space(datasize, laststart + offset, b-`1`);
3074	for (i = `0` ; i < datasize ; i++)
3075	laststart[offset + i] = str[i];
3076
3077	/ In re_match_2_internal if range_start < -1, we*
3078	assume -range_start is the offset of the
3079	collating symbol which is specified as
3080	the character of the range start. So we assign
3081	-(laststart[1] + laststart[2] + 6) to
3082	range_start. /*
3083	range_start = -(laststart[`1`] + laststart[`2`] + `6`);
3084	/ Update the length of collating_symbol. /
3085	laststart[`2`] += datasize;
3086	had_char_class = false;
3087	}
3088	}
3089	else
3090	{
3091	c1++;
3092	while (c1--)
3093	PATUNFETCH;
3094	BUF_PUSH (`'['`);
3095	BUF_PUSH (delim);
3096	laststart[`5`] += `2`; / Update the length of characters /
3097	range_start = delim;
3098	had_char_class = false;
3099	}
3100	}
3101	else
3102	{
3103	had_char_class = false;
3104	BUF_PUSH(c);
3105	laststart[`5`]++; / Update the length of characters /
3106	range_start = c;
3107	}
3108	}
3109
3110	#else /* BYTE */
3111	/ Ensure that we have enough space to push a charset: the*
3112	opcode, the length count, and the bitset; 34 bytes in all. /*
3113	GET_BUFFER_SPACE (`34`);
3114
3115	laststart = b;
3116
3117	/ We test `p == '^' twice, instead of using an if
3118	statement, so we only need one BUF_PUSH. /*
3119	BUF_PUSH (*p == `'^'` ? charset_not : charset);
3120	if (*p == `'^'`)
3121	p++;
3122
3123	/ Remember the first position in the bracket expression. /
3124	p1 = p;
3125
3126	/ Push the number of bytes in the bitmap. /
3127	BUF_PUSH ((`1` << BYTEWIDTH) / BYTEWIDTH);
3128
3129	/ Clear the whole map. /
3130	bzero (b, (`1` << BYTEWIDTH) / BYTEWIDTH);
3131
3132	/ charset_not matches newline according to a syntax bit. /
3133	if ((re_opcode_t) b[-`2`] == charset_not
3134	&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3135	SET_LIST_BIT (`'\n'`);
3136
3137	/ Read in characters and ranges, setting map bits. /
3138	for (;;)
3139	{
3140	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3141
3142	PATFETCH (c);
3143
3144	/ \ might escape characters inside [...] and [^...]. /
3145	if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == `'\\'`)
3146	{
3147	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3148
3149	PATFETCH (c1);
3150	SET_LIST_BIT (c1);
3151	range_start = c1;
3152	continue;
3153	}
3154
3155	/ Could be the end of the bracket expression. If it's*
3156	not (i.e., when the bracket expression is `[]' so
3157	far), the ']' character bit gets set way below. /*
3158	if (c == `']'` && p != p1 + `1`)
3159	break;
3160
3161	/ Look ahead to see if it's a range when the last thing*
3162	was a character class. /*
3163	if (had_char_class && c == `'-'` && *p != `']'`)
3164	FREE_STACK_RETURN (REG_ERANGE);
3165
3166	/ Look ahead to see if it's a range when the last thing*
3167	was a character: if this is a hyphen not at the
3168	beginning or the end of a list, then it's the range
3169	operator. /*
3170	if (c == `'-'`
3171	&& !(p - `2` >= pattern && p[-`2`] == `'['`)
3172	&& !(p - `3` >= pattern && p[-`3`] == `'['` && p[-`2`] == `'^'`)
3173	&& *p != `']'`)
3174	{
3175	reg_errcode_t ret
3176	= byte_compile_range (range_start, &p, pend, translate,
3177	syntax, b);
3178	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3179	range_start = `0xffffffff`;
3180	}
3181
3182	else if (p[`0`] == `'-'` && p[`1`] != `']'`)
3183	{ / This handles ranges made up of characters only. /
3184	reg_errcode_t ret;
3185
3186	/ Move past the `-'. /
3187	PATFETCH (c1);
3188
3189	ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3190	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3191	range_start = `0xffffffff`;
3192	}
3193
3194	/ See if we're at the beginning of a possible character*
3195	class. /*
3196
3197	else if (syntax & RE_CHAR_CLASSES && c == `'['` && *p == `':'`)
3198	{ / Leave room for the null. /
3199	char str[CHAR_CLASS_MAX_LENGTH + `1`];
3200
3201	PATFETCH (c);
3202	c1 = `0`;
3203
3204	/ If pattern is `[[:'. /
3205	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3206
3207	for (;;)
3208	{
3209	PATFETCH (c);
3210	if ((c == `':'` && *p == `']'`) \|\| p == pend)
3211	break;
3212	if (c1 < CHAR_CLASS_MAX_LENGTH)
3213	str[c1++] = c;
3214	else
3215	/ This is in any case an invalid class name. /
3216	str[`0`] = `'\0'`;
3217	}
3218	str[c1] = `'\0'`;
3219
3220	/ If isn't a word bracketed by `[:' and `:]':*
3221	undo the ending character, the letters, and leave
3222	the leading `:' and `[' (but set bits for them). /*
3223	if (c == `':'` && *p == `']'`)
3224	{
3225	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
3226	boolean is_lower = STREQ (str, "lower");
3227	boolean is_upper = STREQ (str, "upper");
3228	wctype_t wt;
3229	int ch;
3230
3231	wt = IS_CHAR_CLASS (str);
3232	if (wt == `0`)
3233	FREE_STACK_RETURN (REG_ECTYPE);
3234
3235	/ Throw away the ] at the end of the character*
3236	class. /*
3237	PATFETCH (c);
3238
3239	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3240
3241	for (ch = `0`; ch < `1` << BYTEWIDTH; ++ch)
3242	{
3243	# ifdef _LIBC
3244	if (__iswctype (__btowc (ch), wt))
3245	SET_LIST_BIT (ch);
3246	# else
3247	if (iswctype (btowc (ch), wt))
3248	SET_LIST_BIT (ch);
3249	# endif
3250
3251	if (translate && (is_upper \|\| is_lower)
3252	&& (ISUPPER (ch) \|\| ISLOWER (ch)))
3253	SET_LIST_BIT (ch);
3254	}
3255
3256	had_char_class = true;
3257	# else
3258	int ch;
3259	boolean is_alnum = STREQ (str, "alnum");
3260	boolean is_alpha = STREQ (str, "alpha");
3261	boolean is_blank = STREQ (str, "blank");
3262	boolean is_cntrl = STREQ (str, "cntrl");
3263	boolean is_digit = STREQ (str, "digit");
3264	boolean is_graph = STREQ (str, "graph");
3265	boolean is_lower = STREQ (str, "lower");
3266	boolean is_print = STREQ (str, "print");
3267	boolean is_punct = STREQ (str, "punct");
3268	boolean is_space = STREQ (str, "space");
3269	boolean is_upper = STREQ (str, "upper");
3270	boolean is_xdigit = STREQ (str, "xdigit");
3271
3272	if (!IS_CHAR_CLASS (str))
3273	FREE_STACK_RETURN (REG_ECTYPE);
3274
3275	/ Throw away the ] at the end of the character*
3276	class. /*
3277	PATFETCH (c);
3278
3279	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3280
3281	for (ch = `0`; ch < `1` << BYTEWIDTH; ch++)
3282	{
3283	/ This was split into 3 if's to*
3284	avoid an arbitrary limit in some compiler. /*
3285	if ( (is_alnum && ISALNUM (ch))
3286	\|\| (is_alpha && ISALPHA (ch))
3287	\|\| (is_blank && ISBLANK (ch))
3288	\|\| (is_cntrl && ISCNTRL (ch)))
3289	SET_LIST_BIT (ch);
3290	if ( (is_digit && ISDIGIT (ch))
3291	\|\| (is_graph && ISGRAPH (ch))
3292	\|\| (is_lower && ISLOWER (ch))
3293	\|\| (is_print && ISPRINT (ch)))
3294	SET_LIST_BIT (ch);
3295	if ( (is_punct && ISPUNCT (ch))
3296	\|\| (is_space && ISSPACE (ch))
3297	\|\| (is_upper && ISUPPER (ch))
3298	\|\| (is_xdigit && ISXDIGIT (ch)))
3299	SET_LIST_BIT (ch);
3300	if ( translate && (is_upper \|\| is_lower)
3301	&& (ISUPPER (ch) \|\| ISLOWER (ch)))
3302	SET_LIST_BIT (ch);
3303	}
3304	had_char_class = true;
3305	# endif /* libc \|\| wctype.h */
3306	}
3307	else
3308	{
3309	c1++;
3310	while (c1--)
3311	PATUNFETCH;
3312	SET_LIST_BIT (`'['`);
3313	SET_LIST_BIT (`':'`);
3314	range_start = `':'`;
3315	had_char_class = false;
3316	}
3317	}
3318	else if (syntax & RE_CHAR_CLASSES && c == `'['` && *p == `'='`)
3319	{
3320	unsigned char str[MB_LEN_MAX + `1`];
3321	# ifdef _LIBC
3322	uint32_t nrules =
3323	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3324	# endif
3325
3326	PATFETCH (c);
3327	c1 = `0`;
3328
3329	/ If pattern is `[[='. /
3330	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3331
3332	for (;;)
3333	{
3334	PATFETCH (c);
3335	if ((c == `'='` && *p == `']'`) \|\| p == pend)
3336	break;
3337	if (c1 < MB_LEN_MAX)
3338	str[c1++] = c;
3339	else
3340	/ This is in any case an invalid class name. /
3341	str[`0`] = `'\0'`;
3342	}
3343	str[c1] = `'\0'`;
3344
3345	if (c == `'='` && *p == `']'` && str[`0`] != `'\0'`)
3346	{
3347	/ If we have no collation data we use the default*
3348	collation in which each character is in a class
3349	by itself. It also means that ASCII is the
3350	character set and therefore we cannot have character
3351	with more than one byte in the multibyte
3352	representation. /*
3353	# ifdef _LIBC
3354	if (nrules == `0`)
3355	# endif
3356	{
3357	if (c1 != `1`)
3358	FREE_STACK_RETURN (REG_ECOLLATE);
3359
3360	/ Throw away the ] at the end of the equivalence*
3361	class. /*
3362	PATFETCH (c);
3363
3364	/ Set the bit for the character. /
3365	SET_LIST_BIT (str[`0`]);
3366	}
3367	# ifdef _LIBC
3368	else
3369	{
3370	/ Try to match the byte sequence in `str' against*
3371	those known to the collate implementation.
3372	First find out whether the bytes in `str' are
3373	actually from exactly one character. /*
3374	const int32_t *table;
3375	const unsigned char *weights;
3376	const unsigned char *extra;
3377	const int32_t *indirect;
3378	int32_t idx;
3379	const unsigned char *cp = str;
3380	int ch;
3381
3382	/ This #include defines a local function! /
3383	# include <locale/weight.h>
3384
3385	table = (const int32_t *)
3386	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3387	weights = (const unsigned char *)
3388	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3389	extra = (const unsigned char *)
3390	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3391	indirect = (const int32_t *)
3392	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3393
3394	idx = findidx (&cp);
3395	if (idx == `0` \|\| cp < str + c1)
3396	/ This is no valid character. /
3397	FREE_STACK_RETURN (REG_ECOLLATE);
3398
3399	/ Throw away the ] at the end of the equivalence*
3400	class. /*
3401	PATFETCH (c);
3402
3403	/ Now we have to go through the whole table*
3404	and find all characters which have the same
3405	first level weight.
3406
3407	XXX Note that this is not entirely correct.
3408	we would have to match multibyte sequences
3409	but this is not possible with the current
3410	implementation. /*
3411	for (ch = `1`; ch < `256`; ++ch)
3412	/ XXX This test would have to be changed if we*
3413	would allow matching multibyte sequences. /*
3414	if (table[ch] > `0`)
3415	{
3416	int32_t idx2 = table[ch];
3417	size_t len = weights[idx2];
3418
3419	/ Test whether the lenghts match. /
3420	if (weights[idx] == len)
3421	{
3422	/ They do. New compare the bytes of*
3423	the weight. /*
3424	size_t cnt = `0`;
3425
3426	while (cnt < len
3427	&& (weights[idx + `1` + cnt]
3428	== weights[idx2 + `1` + cnt]))
3429	++cnt;
3430
3431	if (cnt == len)
3432	/ They match. Mark the character as*
3433	acceptable. /*
3434	SET_LIST_BIT (ch);
3435	}
3436	}
3437	}
3438	# endif
3439	had_char_class = true;
3440	}
3441	else
3442	{
3443	c1++;
3444	while (c1--)
3445	PATUNFETCH;
3446	SET_LIST_BIT (`'['`);
3447	SET_LIST_BIT (`'='`);
3448	range_start = `'='`;
3449	had_char_class = false;
3450	}
3451	}
3452	else if (syntax & RE_CHAR_CLASSES && c == `'['` && *p == `'.'`)
3453	{
3454	unsigned char str[`128`]; / Should be large enough. /
3455	# ifdef _LIBC
3456	uint32_t nrules =
3457	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3458	# endif
3459
3460	PATFETCH (c);
3461	c1 = `0`;
3462
3463	/ If pattern is `[[.'. /
3464	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3465
3466	for (;;)
3467	{
3468	PATFETCH (c);
3469	if ((c == `'.'` && *p == `']'`) \|\| p == pend)
3470	break;
3471	if (c1 < sizeof (str) - `1`)
3472	str[c1++] = c;
3473	else
3474	/ This is in any case an invalid class name. /
3475	str[`0`] = `'\0'`;
3476	}
3477	str[c1] = `'\0'`;
3478
3479	if (c == `'.'` && *p == `']'` && str[`0`] != `'\0'`)
3480	{
3481	/ If we have no collation data we use the default*
3482	collation in which each character is the name
3483	for its own class which contains only the one
3484	character. It also means that ASCII is the
3485	character set and therefore we cannot have character
3486	with more than one byte in the multibyte
3487	representation. /*
3488	# ifdef _LIBC
3489	if (nrules == `0`)
3490	# endif
3491	{
3492	if (c1 != `1`)
3493	FREE_STACK_RETURN (REG_ECOLLATE);
3494
3495	/ Throw away the ] at the end of the equivalence*
3496	class. /*
3497	PATFETCH (c);
3498
3499	/ Set the bit for the character. /
3500	SET_LIST_BIT (str[`0`]);
3501	range_start = ((const unsigned char *) str)[`0`];
3502	}
3503	# ifdef _LIBC
3504	else
3505	{
3506	/ Try to match the byte sequence in `str' against*
3507	those known to the collate implementation.
3508	First find out whether the bytes in `str' are
3509	actually from exactly one character. /*
3510	int32_t table_size;
3511	const int32_t *symb_table;
3512	const unsigned char *extra;
3513	int32_t idx;
3514	int32_t elem;
3515	int32_t second;
3516	int32_t hash;
3517
3518	table_size =
3519	_NL_CURRENT_WORD (LC_COLLATE,
3520	_NL_COLLATE_SYMB_HASH_SIZEMB);
3521	symb_table = (const int32_t *)
3522	_NL_CURRENT (LC_COLLATE,
3523	_NL_COLLATE_SYMB_TABLEMB);
3524	extra = (const unsigned char *)
3525	_NL_CURRENT (LC_COLLATE,
3526	_NL_COLLATE_SYMB_EXTRAMB);
3527
3528	/ Locate the character in the hashing table. /
3529	hash = elem_hash (str, c1);
3530
3531	idx = `0`;
3532	elem = hash % table_size;
3533	second = hash % (table_size - `2`);
3534	while (symb_table[`2` * elem] != `0`)
3535	{
3536	/ First compare the hashing value. /
3537	if (symb_table[`2` * elem] == hash
3538	&& c1 == extra[symb_table[`2` * elem + `1`]]
3539	&& memcmp (str,
3540	&extra[symb_table[`2` * elem + `1`]
3541	+ `1`],
3542	c1) == `0`)
3543	{
3544	/ Yep, this is the entry. /
3545	idx = symb_table[`2` * elem + `1`];
3546	idx += `1` + extra[idx];
3547	break;
3548	}
3549
3550	/ Next entry. /
3551	elem += second;
3552	}
3553
3554	if (symb_table[`2` * elem] == `0`)
3555	/ This is no valid character. /
3556	FREE_STACK_RETURN (REG_ECOLLATE);
3557
3558	/ Throw away the ] at the end of the equivalence*
3559	class. /*
3560	PATFETCH (c);
3561
3562	/ Now add the multibyte character(s) we found*
3563	to the accept list.
3564
3565	XXX Note that this is not entirely correct.
3566	we would have to match multibyte sequences
3567	but this is not possible with the current
3568	implementation. Also, we have to match
3569	collating symbols, which expand to more than
3570	one file, as a whole and not allow the
3571	individual bytes. /*
3572	c1 = extra[idx++];
3573	if (c1 == `1`)
3574	range_start = extra[idx];
3575	while (c1-- > `0`)
3576	{
3577	SET_LIST_BIT (extra[idx]);
3578	++idx;
3579	}
3580	}
3581	# endif
3582	had_char_class = false;
3583	}
3584	else
3585	{
3586	c1++;
3587	while (c1--)
3588	PATUNFETCH;
3589	SET_LIST_BIT (`'['`);
3590	SET_LIST_BIT (`'.'`);
3591	range_start = `'.'`;
3592	had_char_class = false;
3593	}
3594	}
3595	else
3596	{
3597	had_char_class = false;
3598	SET_LIST_BIT (c);
3599	range_start = c;
3600	}
3601	}
3602
3603	/ Discard any (non)matching list bytes that are all 0 at the*
3604	end of the map. Decrease the map-length byte too. /*
3605	while ((int) b[-`1`] > `0` && b[b[-`1`] - `1`] == `0`)
3606	b[-`1`]--;
3607	b += b[-`1`];
3608	#endif /* WCHAR */
3609	}
3610	break;
3611
3612
3613	case `'('`:
3614	if (syntax & RE_NO_BK_PARENS)
3615	goto handle_open;
3616	else
3617	goto normal_char;
3618
3619
3620	case `')'`:
3621	if (syntax & RE_NO_BK_PARENS)
3622	goto handle_close;
3623	else
3624	goto normal_char;
3625
3626
3627	case `'\n'`:
3628	if (syntax & RE_NEWLINE_ALT)
3629	goto handle_alt;
3630	else
3631	goto normal_char;
3632
3633
3634	case `'\|'`:
3635	if (syntax & RE_NO_BK_VBAR)
3636	goto handle_alt;
3637	else
3638	goto normal_char;
3639
3640
3641	case `'{'`:
3642	if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3643	goto handle_interval;
3644	else
3645	goto normal_char;
3646
3647
3648	case `'\\'`:
3649	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3650
3651	/ Do not translate the character after the \, so that we can*
3652	distinguish, e.g., \B from \b, even if we normally would
3653	translate, e.g., B to b. /*
3654	PATFETCH_RAW (c);
3655
3656	switch (c)
3657	{
3658	case `'('`:
3659	if (syntax & RE_NO_BK_PARENS)
3660	goto normal_backslash;
3661
3662	handle_open:
3663	bufp->re_nsub++;
3664	regnum++;
3665
3666	if (COMPILE_STACK_FULL)
3667	{
3668	RETALLOC (compile_stack.stack, compile_stack.size << `1`,
3669	compile_stack_elt_t);
3670	if (compile_stack.stack == NULL) return REG_ESPACE;
3671
3672	compile_stack.size <<= `1`;
3673	}
3674
3675	/ These are the values to restore when we hit end of this*
3676	group. They are all relative offsets, so that if the
3677	whole pattern moves because of realloc, they will still
3678	be valid. /*
3679	COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3680	COMPILE_STACK_TOP.fixup_alt_jump
3681	= fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + `1` : `0`;
3682	COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3683	COMPILE_STACK_TOP.regnum = regnum;
3684
3685	/ We will eventually replace the 0 with the number of*
3686	groups inner to this one. But do not push a
3687	start_memory for groups beyond the last one we can
3688	represent in the compiled pattern. /*
3689	if (regnum <= MAX_REGNUM)
3690	{
3691	COMPILE_STACK_TOP.inner_group_offset = b
3692	- COMPILED_BUFFER_VAR + `2`;
3693	BUF_PUSH_3 (start_memory, regnum, `0`);
3694	}
3695
3696	compile_stack.avail++;
3697
3698	fixup_alt_jump = `0`;
3699	laststart = `0`;
3700	begalt = b;
3701	/ If we've reached MAX_REGNUM groups, then this open*
3702	won't actually generate any code, so we'll have to
3703	clear pending_exact explicitly. /*
3704	pending_exact = `0`;
3705	break;
3706
3707
3708	case `')'`:
3709	if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3710
3711	if (COMPILE_STACK_EMPTY)
3712	{
3713	if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3714	goto normal_backslash;
3715	else
3716	FREE_STACK_RETURN (REG_ERPAREN);
3717	}
3718
3719	handle_close:
3720	if (fixup_alt_jump)
3721	{ / Push a dummy failure point at the end of the*
3722	alternative for a possible future
3723	`pop_failure_jump' to pop. See comments at
3724	`push_dummy_failure' in `re_match_2'. /*
3725	BUF_PUSH (push_dummy_failure);
3726
3727	/ We allocated space for this jump when we assigned*
3728	to `fixup_alt_jump', in the `handle_alt' case below. /*
3729	STORE_JUMP (jump_past_alt, fixup_alt_jump, b - `1`);
3730	}
3731
3732	/ See similar code for backslashed left paren above. /
3733	if (COMPILE_STACK_EMPTY)
3734	{
3735	if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3736	goto normal_char;
3737	else
3738	FREE_STACK_RETURN (REG_ERPAREN);
3739	}
3740
3741	/ Since we just checked for an empty stack above, this*
3742	``can't happen''. /*
3743	assert (compile_stack.avail != `0`);
3744	{
3745	/ We don't just want to restore into `regnum', because*
3746	later groups should continue to be numbered higher,
3747	as in `(ab)c(de)' -- the second group is #2. /*
3748	regnum_t this_group_regnum;
3749
3750	compile_stack.avail--;
3751	begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3752	fixup_alt_jump
3753	= COMPILE_STACK_TOP.fixup_alt_jump
3754	? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - `1`
3755	: `0`;
3756	laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3757	this_group_regnum = COMPILE_STACK_TOP.regnum;
3758	/ If we've reached MAX_REGNUM groups, then this open*
3759	won't actually generate any code, so we'll have to
3760	clear pending_exact explicitly. /*
3761	pending_exact = `0`;
3762
3763	/ We're at the end of the group, so now we know how many*
3764	groups were inside this one. /*
3765	if (this_group_regnum <= MAX_REGNUM)
3766	{
3767	UCHAR_T *inner_group_loc
3768	= COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3769
3770	*inner_group_loc = regnum - this_group_regnum;
3771	BUF_PUSH_3 (stop_memory, this_group_regnum,
3772	regnum - this_group_regnum);
3773	}
3774	}
3775	break;
3776
3777
3778	case `'\|'`: / `\\|'. /
3779	if (syntax & RE_LIMITED_OPS \|\| syntax & RE_NO_BK_VBAR)
3780	goto normal_backslash;
3781	handle_alt:
3782	if (syntax & RE_LIMITED_OPS)
3783	goto normal_char;
3784
3785	/ Insert before the previous alternative a jump which*
3786	jumps to this alternative if the former fails. /*
3787	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
3788	INSERT_JUMP (on_failure_jump, begalt,
3789	b + `2` + `2` * OFFSET_ADDRESS_SIZE);
3790	pending_exact = `0`;
3791	b += `1` + OFFSET_ADDRESS_SIZE;
3792
3793	/ The alternative before this one has a jump after it*
3794	which gets executed if it gets matched. Adjust that
3795	jump so it will jump to this alternative's analogous
3796	jump (put in below, which in turn will jump to the next
3797	(if any) alternative's such jump, etc.). The last such
3798	jump jumps to the correct final destination. A picture:
3799	_____ _____
3800	\| \| \| \|
3801	\| v \| v
3802	a \| b \| c
3803
3804	If we are at `b', then fixup_alt_jump right now points to a
3805	three-byte space after `a'. We'll put in the jump, set
3806	fixup_alt_jump to right after `b', and leave behind three
3807	bytes which we'll fill in when we get to after `c'. /*
3808
3809	if (fixup_alt_jump)
3810	STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3811
3812	/ Mark and leave space for a jump after this alternative,*
3813	to be filled in later either by next alternative or
3814	when know we're at the end of a series of alternatives. /*
3815	fixup_alt_jump = b;
3816	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
3817	b += `1` + OFFSET_ADDRESS_SIZE;
3818
3819	laststart = `0`;
3820	begalt = b;
3821	break;
3822
3823
3824	case `'{'`:
3825	/ If \{ is a literal. /
3826	if (!(syntax & RE_INTERVALS)
3827	/ If we're at `\{' and it's not the open-interval*
3828	operator. /*
3829	\|\| (syntax & RE_NO_BK_BRACES))
3830	goto normal_backslash;
3831
3832	handle_interval:
3833	{
3834	/ If got here, then the syntax allows intervals. /
3835
3836	/ At least (most) this many matches must be made. /
3837	int lower_bound = -`1`, upper_bound = -`1`;
3838
3839	/ Place in the uncompiled pattern (i.e., just after*
3840	the '{') to go back to if the interval is invalid. /*
3841	const CHAR_T *beg_interval = p;
3842
3843	if (p == pend)
3844	goto invalid_interval;
3845
3846	GET_UNSIGNED_NUMBER (lower_bound);
3847
3848	if (c == `','`)
3849	{
3850	GET_UNSIGNED_NUMBER (upper_bound);
3851	if (upper_bound < `0`)
3852	upper_bound = RE_DUP_MAX;
3853	}
3854	else
3855	/ Interval such as `{1}' => match exactly once. /
3856	upper_bound = lower_bound;
3857
3858	if (! (`0` <= lower_bound && lower_bound <= upper_bound))
3859	goto invalid_interval;
3860
3861	if (!(syntax & RE_NO_BK_BRACES))
3862	{
3863	if (c != `'\\'` \|\| p == pend)
3864	goto invalid_interval;
3865	PATFETCH (c);
3866	}
3867
3868	if (c != `'}'`)
3869	goto invalid_interval;
3870
3871	/ If it's invalid to have no preceding re. /
3872	if (!laststart)
3873	{
3874	if (syntax & RE_CONTEXT_INVALID_OPS
3875	&& !(syntax & RE_INVALID_INTERVAL_ORD))
3876	FREE_STACK_RETURN (REG_BADRPT);
3877	else if (syntax & RE_CONTEXT_INDEP_OPS)
3878	laststart = b;
3879	else
3880	goto unfetch_interval;
3881	}
3882
3883	/ We just parsed a valid interval. /
3884
3885	if (RE_DUP_MAX < upper_bound)
3886	FREE_STACK_RETURN (REG_BADBR);
3887
3888	/ If the upper bound is zero, don't want to succeed at*
3889	all; jump from `laststart' to `b + 3', which will be
3890	the end of the buffer after we insert the jump. /*
3891	/ ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'*
3892	instead of 'b + 3'. /*
3893	if (upper_bound == `0`)
3894	{
3895	GET_BUFFER_SPACE (`1` + OFFSET_ADDRESS_SIZE);
3896	INSERT_JUMP (jump, laststart, b + `1`
3897	+ OFFSET_ADDRESS_SIZE);
3898	b += `1` + OFFSET_ADDRESS_SIZE;
3899	}
3900
3901	/ Otherwise, we have a nontrivial interval. When*
3902	we're all done, the pattern will look like:
3903	set_number_at <jump count> <upper bound>
3904	set_number_at <succeed_n count> <lower bound>
3905	succeed_n <after jump addr> <succeed_n count>
3906	<body of loop>
3907	jump_n <succeed_n addr> <jump count>
3908	(The upper bound and `jump_n' are omitted if
3909	`upper_bound' is 1, though.) /*
3910	else
3911	{ / If the upper bound is > 1, we need to insert*
3912	more at the end of the loop. /*
3913	unsigned nbytes = `2` + `4` * OFFSET_ADDRESS_SIZE +
3914	(upper_bound > `1`) * (`2` + `4` * OFFSET_ADDRESS_SIZE);
3915
3916	GET_BUFFER_SPACE (nbytes);
3917
3918	/ Initialize lower bound of the `succeed_n', even*
3919	though it will be set during matching by its
3920	attendant `set_number_at' (inserted next),
3921	because `re_compile_fastmap' needs to know.
3922	Jump to the `jump_n' we might insert below. /*
3923	INSERT_JUMP2 (succeed_n, laststart,
3924	b + `1` + `2` * OFFSET_ADDRESS_SIZE
3925	+ (upper_bound > `1`) * (`1` + `2` * OFFSET_ADDRESS_SIZE)
3926	, lower_bound);
3927	b += `1` + `2` * OFFSET_ADDRESS_SIZE;
3928
3929	/ Code to initialize the lower bound. Insert*
3930	before the `succeed_n'. The `5' is the last two
3931	bytes of this `set_number_at', plus 3 bytes of
3932	the following `succeed_n'. /*
3933	/ ifdef WCHAR, The '1+2OFFSET_ADDRESS_SIZE'
3934	is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
3935	of the following `succeed_n'. /*
3936	PREFIX(insert_op2) (set_number_at, laststart, `1`
3937	+ `2` * OFFSET_ADDRESS_SIZE, lower_bound, b);
3938	b += `1` + `2` * OFFSET_ADDRESS_SIZE;
3939
3940	if (upper_bound > `1`)
3941	{ / More than one repetition is allowed, so*
3942	append a backward jump to the `succeed_n'
3943	that starts this interval.
3944
3945	When we've reached this during matching,
3946	we'll have matched the interval once, so
3947	jump back only `upper_bound - 1' times. /*
3948	STORE_JUMP2 (jump_n, b, laststart
3949	+ `2` * OFFSET_ADDRESS_SIZE + `1`,
3950	upper_bound - `1`);
3951	b += `1` + `2` * OFFSET_ADDRESS_SIZE;
3952
3953	/ The location we want to set is the second*
3954	parameter of the `jump_n'; that is `b-2' as
3955	an absolute address. `laststart' will be
3956	the `set_number_at' we're about to insert;
3957	`laststart+3' the number to set, the source
3958	for the relative address. But we are
3959	inserting into the middle of the pattern --
3960	so everything is getting moved up by 5.
3961	Conclusion: (b - 2) - (laststart + 3) + 5,
3962	i.e., b - laststart.
3963
3964	We insert this at the beginning of the loop
3965	so that if we fail during matching, we'll
3966	reinitialize the bounds. /*
3967	PREFIX(insert_op2) (set_number_at, laststart,
3968	b - laststart,
3969	upper_bound - `1`, b);
3970	b += `1` + `2` * OFFSET_ADDRESS_SIZE;
3971	}
3972	}
3973	pending_exact = `0`;
3974	break;
3975
3976	invalid_interval:
3977	if (!(syntax & RE_INVALID_INTERVAL_ORD))
3978	FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
3979	unfetch_interval:
3980	/ Match the characters as literals. /
3981	p = beg_interval;
3982	c = `'{'`;
3983	if (syntax & RE_NO_BK_BRACES)
3984	goto normal_char;
3985	else
3986	goto normal_backslash;
3987	}
3988
3989	#ifdef emacs
3990	/ There is no way to specify the before_dot and after_dot*
3991	operators. rms says this is ok. --karl /*
3992	case `'='`:
3993	BUF_PUSH (at_dot);
3994	break;
3995
3996	case `'s'`:
3997	laststart = b;
3998	PATFETCH (c);
3999	BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4000	break;
4001
4002	case `'S'`:
4003	laststart = b;
4004	PATFETCH (c);
4005	BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4006	break;
4007	#endif /* emacs */
4008
4009
4010	case `'w'`:
4011	if (syntax & RE_NO_GNU_OPS)
4012	goto normal_char;
4013	laststart = b;
4014	BUF_PUSH (wordchar);
4015	break;
4016
4017
4018	case `'W'`:
4019	if (syntax & RE_NO_GNU_OPS)
4020	goto normal_char;
4021	laststart = b;
4022	BUF_PUSH (notwordchar);
4023	break;
4024
4025
4026	case `'<'`:
4027	if (syntax & RE_NO_GNU_OPS)
4028	goto normal_char;
4029	BUF_PUSH (wordbeg);
4030	break;
4031
4032	case `'>'`:
4033	if (syntax & RE_NO_GNU_OPS)
4034	goto normal_char;
4035	BUF_PUSH (wordend);
4036	break;
4037
4038	case `'b'`:
4039	if (syntax & RE_NO_GNU_OPS)
4040	goto normal_char;
4041	BUF_PUSH (wordbound);
4042	break;
4043
4044	case `'B'`:
4045	if (syntax & RE_NO_GNU_OPS)
4046	goto normal_char;
4047	BUF_PUSH (notwordbound);
4048	break;
4049
4050	case '`':
4051	if (syntax & RE_NO_GNU_OPS)
4052	goto normal_char;
4053	BUF_PUSH (begbuf);
4054	break;
4055
4056	case `'\''`:
4057	if (syntax & RE_NO_GNU_OPS)
4058	goto normal_char;
4059	BUF_PUSH (endbuf);
4060	break;
4061
4062	case `'1'`: case `'2'`: case `'3'`: case `'4'`: case `'5'`:
4063	case `'6'`: case `'7'`: case `'8'`: case `'9'`:
4064	if (syntax & RE_NO_BK_REFS)
4065	goto normal_char;
4066
4067	c1 = c - `'0'`;
4068
4069	if (c1 > regnum)
4070	FREE_STACK_RETURN (REG_ESUBREG);
4071
4072	/ Can't back reference to a subexpression if inside of it. /
4073	if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4074	goto normal_char;
4075
4076	laststart = b;
4077	BUF_PUSH_2 (duplicate, c1);
4078	break;
4079
4080
4081	case `'+'`:
4082	case `'?'`:
4083	if (syntax & RE_BK_PLUS_QM)
4084	goto handle_plus;
4085	else
4086	goto normal_backslash;
4087
4088	default:
4089	normal_backslash:
4090	/ You might think it would be useful for \ to mean*
4091	not to translate; but if we don't translate it
4092	it will never match anything. /*
4093	c = TRANSLATE (c);
4094	goto normal_char;
4095	}
4096	break;
4097
4098
4099	default:
4100	/ Expects the character in `c'. /
4101	normal_char:
4102	/ If no exactn currently being built. /
4103	if (!pending_exact
4104	#ifdef WCHAR
4105	/ If last exactn handle binary(or character) and*
4106	new exactn handle character(or binary). /*
4107	\|\| is_exactn_bin != is_binary[p - `1` - pattern]
4108	#endif /* WCHAR */
4109
4110	/ If last exactn not at current position. /
4111	\|\| pending_exact + *pending_exact + `1` != b
4112
4113	/ We have only one byte following the exactn for the count. /
4114	\|\| *pending_exact == (`1` << BYTEWIDTH) - `1`
4115
4116	/ If followed by a repetition operator. /
4117	\|\| p == `''` \|\| *p == `'^'`
4118	\|\| ((syntax & RE_BK_PLUS_QM)
4119	? *p == `'\\'` && (p[`1`] == `'+'` \|\| p[`1`] == `'?'`)
4120	: (p == `'+'` \|\| p == `'?'`))
4121	\|\| ((syntax & RE_INTERVALS)
4122	&& ((syntax & RE_NO_BK_BRACES)
4123	? *p == `'{'`
4124	: (p[`0`] == `'\\'` && p[`1`] == `'{'`))))
4125	{
4126	/ Start building a new exactn. /
4127
4128	laststart = b;
4129
4130	#ifdef WCHAR
4131	/ Is this exactn binary data or character? /
4132	is_exactn_bin = is_binary[p - `1` - pattern];
4133	if (is_exactn_bin)
4134	BUF_PUSH_2 (exactn_bin, `0`);
4135	else
4136	BUF_PUSH_2 (exactn, `0`);
4137	#else
4138	BUF_PUSH_2 (exactn, `0`);
4139	#endif /* WCHAR */
4140	pending_exact = b - `1`;
4141	}
4142
4143	BUF_PUSH (c);
4144	(*pending_exact)++;
4145	break;
4146	} / switch (c) /
4147	} / while p != pend /
4148
4149
4150	/ Through the pattern now. /
4151
4152	if (fixup_alt_jump)
4153	STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4154
4155	if (!COMPILE_STACK_EMPTY)
4156	FREE_STACK_RETURN (REG_EPAREN);
4157
4158	/ If we don't want backtracking, force success*
4159	the first time we reach the end of the compiled pattern. /*
4160	if (syntax & RE_NO_POSIX_BACKTRACKING)
4161	BUF_PUSH (succeed);
4162
4163	#ifdef WCHAR
4164	free (pattern);
4165	free (mbs_offset);
4166	free (is_binary);
4167	#endif
4168	free (compile_stack.stack);
4169
4170	/ We have succeeded; set the length of the buffer. /
4171	#ifdef WCHAR
4172	bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4173	#else
4174	bufp->used = b - bufp->buffer;
4175	#endif
4176
4177	#ifdef DEBUG
4178	if (debug)
4179	{
4180	DEBUG_PRINT1 ("\nCompiled pattern: \n");
4181	PREFIX(print_compiled_pattern) (bufp);
4182	}
4183	#endif /* DEBUG */
4184
4185	#ifndef MATCH_MAY_ALLOCATE
4186	/ Initialize the failure stack to the largest possible stack. This*
4187	isn't necessary unless we're trying to avoid calling alloca in
4188	the search and match routines. /*
4189	{
4190	int num_regs = bufp->re_nsub + `1`;
4191
4192	/ Since DOUBLE_FAIL_STACK refuses to double only if the current size*
4193	is strictly greater than re_max_failures, the largest possible stack
4194	is 2 re_max_failures failure points. /
4195	if (fail_stack.size < (`2` * re_max_failures * MAX_FAILURE_ITEMS))
4196	{
4197	fail_stack.size = (`2` * re_max_failures * MAX_FAILURE_ITEMS);
4198
4199	# ifdef emacs
4200	if (! fail_stack.stack)
4201	fail_stack.stack
4202	= (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4203	* sizeof (PREFIX(fail_stack_elt_t)));
4204	else
4205	fail_stack.stack
4206	= (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4207	(fail_stack.size
4208	* sizeof (PREFIX(fail_stack_elt_t))));
4209	# else /* not emacs */
4210	if (! fail_stack.stack)
4211	fail_stack.stack
4212	= (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4213	* sizeof (PREFIX(fail_stack_elt_t)));
4214	else
4215	fail_stack.stack
4216	= (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4217	(fail_stack.size
4218	* sizeof (PREFIX(fail_stack_elt_t))));
4219	# endif /* not emacs */
4220	}
4221
4222	PREFIX(regex_grow_registers) (num_regs);
4223	}
4224	#endif /* not MATCH_MAY_ALLOCATE */
4225
4226	return REG_NOERROR;
4227	} / regex_compile /
4228
4229	/ Subroutines for `regex_compile'. /
4230
4231	/ Store OP at LOC followed by two-byte integer parameter ARG. /
4232	/ ifdef WCHAR, integer parameter is 1 wchar_t. /
4233
4234	static void
4235	PREFIX(store_op1) (re_opcode_t op, UCHAR_T loc, int* arg)
4236	{
4237	*loc = (UCHAR_T) op;
4238	STORE_NUMBER (loc + `1`, arg);
4239	}
4240
4241
4242	/ Like `store_op1', but for two two-byte parameters ARG1 and ARG2. /
4243	/ ifdef WCHAR, integer parameter is 1 wchar_t. /
4244
4245	static void
4246	PREFIX(store_op2) (re_opcode_t op, UCHAR_T loc, int* arg1, int arg2)
4247	{
4248	*loc = (UCHAR_T) op;
4249	STORE_NUMBER (loc + `1`, arg1);
4250	STORE_NUMBER (loc + `1` + OFFSET_ADDRESS_SIZE, arg2);
4251	}
4252
4253
4254	/ Copy the bytes from LOC to END to open up three bytes of space at LOC*
4255	for OP followed by two-byte integer parameter ARG. /*
4256	/ ifdef WCHAR, integer parameter is 1 wchar_t. /
4257
4258	static void
4259	PREFIX(insert_op1) (re_opcode_t op, UCHAR_T loc, int* arg, UCHAR_T *end)
4260	{
4261	register UCHAR_T *pfrom = end;
4262	register UCHAR_T *pto = end + `1` + OFFSET_ADDRESS_SIZE;
4263
4264	while (pfrom != loc)
4265	--pto = --pfrom;
4266
4267	PREFIX(store_op1) (op, loc, arg);
4268	}
4269
4270
4271	/ Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. /
4272	/ ifdef WCHAR, integer parameter is 1 wchar_t. /
4273
4274	static void
4275	PREFIX(insert_op2) (re_opcode_t op, UCHAR_T loc, int* arg1,
4276	int arg2, UCHAR_T *end)
4277	{
4278	register UCHAR_T *pfrom = end;
4279	register UCHAR_T pto = end + `1` + `2` OFFSET_ADDRESS_SIZE;
4280
4281	while (pfrom != loc)
4282	--pto = --pfrom;
4283
4284	PREFIX(store_op2) (op, loc, arg1, arg2);
4285	}
4286
4287
4288	/ P points to just after a ^ in PATTERN. Return true if that ^ comes*
4289	after an alternative or a begin-subexpression. We assume there is at
4290	least one character before the ^. /*
4291
4292	static boolean
4293	PREFIX(at_begline_loc_p) (const CHAR_T pattern, const* CHAR_T *p,
4294	reg_syntax_t syntax)
4295	{
4296	const CHAR_T *prev = p - `2`;
4297	boolean prev_prev_backslash = prev > pattern && prev[-`1`] == `'\\'`;
4298
4299	return
4300	/ After a subexpression? /
4301	(*prev == `'('` && (syntax & RE_NO_BK_PARENS \|\| prev_prev_backslash))
4302	/ After an alternative? /
4303	\|\| (*prev == `'\|'` && (syntax & RE_NO_BK_VBAR \|\| prev_prev_backslash));
4304	}
4305
4306
4307	/ The dual of at_begline_loc_p. This one is for $. We assume there is*
4308	at least one character after the $, i.e., `P < PEND'. /*
4309
4310	static boolean
4311	PREFIX(at_endline_loc_p) (const CHAR_T p, const* CHAR_T *pend,
4312	reg_syntax_t syntax)
4313	{
4314	const CHAR_T *next = p;
4315	boolean next_backslash = *next == `'\\'`;
4316	const CHAR_T *next_next = p + `1` < pend ? p + `1` : `0`;
4317
4318	return
4319	/ Before a subexpression? /
4320	(syntax & RE_NO_BK_PARENS ? *next == `')'`
4321	: next_backslash && next_next && *next_next == `')'`)
4322	/ Before an alternative? /
4323	\|\| (syntax & RE_NO_BK_VBAR ? *next == `'\|'`
4324	: next_backslash && next_next && *next_next == `'\|'`);
4325	}
4326
4327	#else /* not INSIDE_RECURSION */
4328
4329	/ Returns true if REGNUM is in one of COMPILE_STACK's elements and*
4330	false if it's not. /*
4331
4332	static boolean
4333	group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4334	{
4335	int this_element;
4336
4337	for (this_element = compile_stack.avail - `1`;
4338	this_element >= `0`;
4339	this_element--)
4340	if (compile_stack.stack[this_element].regnum == regnum)
4341	return true;
4342
4343	return false;
4344	}
4345	#endif /* not INSIDE_RECURSION */
4346
4347	#ifdef INSIDE_RECURSION
4348
4349	#ifdef WCHAR
4350	/ This insert space, which size is "num", into the pattern at "loc".*
4351	"end" must point the end of the allocated buffer. /*
4352	static void
4353	insert_space (int num, CHAR_T loc, CHAR_T end)
4354	{
4355	register CHAR_T *pto = end;
4356	register CHAR_T *pfrom = end - num;
4357
4358	while (pfrom >= loc)
4359	pto-- = pfrom--;
4360	}
4361	#endif /* WCHAR */
4362
4363	#ifdef WCHAR
4364	static reg_errcode_t
4365	wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
4366	const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
4367	reg_syntax_t syntax, CHAR_T b, CHAR_T char_set)
4368	{
4369	const CHAR_T p = p_ptr;
4370	CHAR_T range_start, range_end;
4371	reg_errcode_t ret;
4372	# ifdef _LIBC
4373	uint32_t nrules;
4374	uint32_t start_val, end_val;
4375	# endif
4376	if (p == pend)
4377	return REG_ERANGE;
4378
4379	# ifdef _LIBC
4380	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4381	if (nrules != `0`)
4382	{
4383	const char collseq = (const* char *) _NL_CURRENT(LC_COLLATE,
4384	_NL_COLLATE_COLLSEQWC);
4385	const unsigned char extra = (const* unsigned char *)
4386	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4387
4388	if (range_start_char < -`1`)
4389	{
4390	/ range_start is a collating symbol. /
4391	int32_t *wextra;
4392	/ Retreive the index and get collation sequence value. /
4393	wextra = (int32_t*)(extra + char_set[-range_start_char]);
4394	start_val = wextra[`1` + *wextra];
4395	}
4396	else
4397	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4398
4399	end_val = collseq_table_lookup (collseq, TRANSLATE (p[`0`]));
4400
4401	/ Report an error if the range is empty and the syntax prohibits*
4402	this. /*
4403	ret = ((syntax & RE_NO_EMPTY_RANGES)
4404	&& (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4405
4406	/ Insert space to the end of the char_ranges. /
4407	insert_space(`2`, b - char_set[`5`] - `2`, b - `1`);
4408	(b - char_set[`5`] - `2`) = (wchar_t*)start_val;
4409	(b - char_set[`5`] - `1`) = (wchar_t*)end_val;
4410	char_set[`4`]++; / ranges_index /
4411	}
4412	else
4413	# endif
4414	{
4415	range_start = (range_start_char >= `0`)? TRANSLATE (range_start_char):
4416	range_start_char;
4417	range_end = TRANSLATE (p[`0`]);
4418	/ Report an error if the range is empty and the syntax prohibits*
4419	this. /*
4420	ret = ((syntax & RE_NO_EMPTY_RANGES)
4421	&& (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4422
4423	/ Insert space to the end of the char_ranges. /
4424	insert_space(`2`, b - char_set[`5`] - `2`, b - `1`);
4425	*(b - char_set[`5`] - `2`) = range_start;
4426	*(b - char_set[`5`] - `1`) = range_end;
4427	char_set[`4`]++; / ranges_index /
4428	}
4429	/ Have to increment the pointer into the pattern string, so the*
4430	caller isn't still at the ending character. /*
4431	(*p_ptr)++;
4432
4433	return ret;
4434	}
4435	#else /* BYTE */
4436	/ Read the ending character of a range (in a bracket expression) from the*
4437	uncompiled pattern P_PTR (which ends at PEND). We assume the*
4438	starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4439	Then we set the translation of all bits between the starting and
4440	ending characters (inclusive) in the compiled pattern B.
4441
4442	Return an error code.
4443
4444	We use these short variable names so we can use the same macros as
4445	`regex_compile' itself. /*
4446
4447	static reg_errcode_t
4448	byte_compile_range (unsigned int range_start_char, const char **p_ptr,
4449	const char *pend, RE_TRANSLATE_TYPE translate,
4450	reg_syntax_t syntax, unsigned char *b)
4451	{
4452	unsigned this_char;
4453	const char p = p_ptr;
4454	reg_errcode_t ret;
4455	# if _LIBC
4456	const unsigned char *collseq;
4457	unsigned int start_colseq;
4458	unsigned int end_colseq;
4459	# else
4460	unsigned end_char;
4461	# endif
4462
4463	if (p == pend)
4464	return REG_ERANGE;
4465
4466	/ Have to increment the pointer into the pattern string, so the*
4467	caller isn't still at the ending character. /*
4468	(*p_ptr)++;
4469
4470	/ Report an error if the range is empty and the syntax prohibits this. /
4471	ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4472
4473	# if _LIBC
4474	collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4475	_NL_COLLATE_COLLSEQMB);
4476
4477	start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4478	end_colseq = collseq[(unsigned char) TRANSLATE (p[`0`])];
4479	for (this_char = `0`; this_char <= (unsigned char) -`1`; ++this_char)
4480	{
4481	unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4482
4483	if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4484	{
4485	SET_LIST_BIT (TRANSLATE (this_char));
4486	ret = REG_NOERROR;
4487	}
4488	}
4489	# else
4490	/ Here we see why `this_char' has to be larger than an `unsigned*
4491	char' -- we would otherwise go into an infinite loop, since all
4492	characters <= 0xff. /*
4493	range_start_char = TRANSLATE (range_start_char);
4494	/ TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,*
4495	and some compilers cast it to int implicitly, so following for_loop
4496	may fall to (almost) infinite loop.
4497	e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4498	To avoid this, we cast p[0] to unsigned int and truncate it. /*
4499	end_char = ((unsigned)TRANSLATE(p[`0`]) & ((`1` << BYTEWIDTH) - `1`));
4500
4501	for (this_char = range_start_char; this_char <= end_char; ++this_char)
4502	{
4503	SET_LIST_BIT (TRANSLATE (this_char));
4504	ret = REG_NOERROR;
4505	}
4506	# endif
4507
4508	return ret;
4509	}
4510	#endif /* WCHAR */
4511
4512	/ re_compile_fastmap computes a ``fastmap'' for the compiled pattern in*
4513	BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
4514	characters can start a string that matches the pattern. This fastmap
4515	is used by re_search to skip quickly over impossible starting points.
4516
4517	The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4518	area as BUFP->fastmap.
4519
4520	We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4521	the pattern buffer.
4522
4523	Returns 0 if we succeed, -2 if an internal error. /*
4524
4525	#ifdef WCHAR
4526	/ local function for re_compile_fastmap.*
4527	truncate wchar_t character to char. /*
4528	static unsigned char truncate_wchar (CHAR_T c);
4529
4530	static unsigned char
4531	truncate_wchar (CHAR_T c)
4532	{
4533	unsigned char buf[MB_CUR_MAX];
4534	mbstate_t state;
4535	int retval;
4536	memset (&state, `'\0'`, sizeof (state));
4537	# ifdef _LIBC
4538	retval = __wcrtomb (buf, c, &state);
4539	# else
4540	retval = wcrtomb (buf, c, &state);
4541	# endif
4542	return retval > `0` ? buf[`0`] : (unsigned char) c;
4543	}
4544	#endif /* WCHAR */
4545
4546	static int
4547	PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
4548	{
4549	int j, k;
4550	#ifdef MATCH_MAY_ALLOCATE
4551	PREFIX(fail_stack_type) fail_stack;
4552	#endif
4553	#ifndef REGEX_MALLOC
4554	char *destination;
4555	#endif
4556
4557	register char *fastmap = bufp->fastmap;
4558
4559	#ifdef WCHAR
4560	/ We need to cast pattern to (wchar_t), because we casted this compiled
4561	pattern to (char) in regex_compile. /
4562	UCHAR_T pattern = (UCHAR_T)bufp->buffer;
4563	register UCHAR_T pend = (UCHAR_T) (bufp->buffer + bufp->used);
4564	#else /* BYTE */
4565	UCHAR_T *pattern = bufp->buffer;
4566	register UCHAR_T *pend = pattern + bufp->used;
4567	#endif /* WCHAR */
4568	UCHAR_T *p = pattern;
4569
4570	#ifdef REL_ALLOC
4571	/ This holds the pointer to the failure stack, when*
4572	it is allocated relocatably. /*
4573	fail_stack_elt_t *failure_stack_ptr;
4574	#endif
4575
4576	/ Assume that each path through the pattern can be null until*
4577	proven otherwise. We set this false at the bottom of switch
4578	statement, to which we get only if a particular path doesn't
4579	match the empty string. /*
4580	boolean path_can_be_null = true;
4581
4582	/ We aren't doing a `succeed_n' to begin with. /
4583	boolean succeed_n_p = false;
4584
4585	assert (fastmap != NULL && p != NULL);
4586
4587	INIT_FAIL_STACK ();
4588	bzero (fastmap, `1` << BYTEWIDTH); / Assume nothing's valid. /
4589	bufp->fastmap_accurate = `1`; / It will be when we're done. /
4590	bufp->can_be_null = `0`;
4591
4592	while (`1`)
4593	{
4594	if (p == pend \|\| *p == (UCHAR_T) succeed)
4595	{
4596	/ We have reached the (effective) end of pattern. /
4597	if (!FAIL_STACK_EMPTY ())
4598	{
4599	bufp->can_be_null \|= path_can_be_null;
4600
4601	/ Reset for next path. /
4602	path_can_be_null = true;
4603
4604	p = fail_stack.stack[--fail_stack.avail].pointer;
4605
4606	continue;
4607	}
4608	else
4609	break;
4610	}
4611
4612	/ We should never be about to go beyond the end of the pattern. /
4613	assert (p < pend);
4614
4615	switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4616	{
4617
4618	/ I guess the idea here is to simply not bother with a fastmap*
4619	if a backreference is used, since it's too hard to figure out
4620	the fastmap for the corresponding group. Setting
4621	`can_be_null' stops `re_search_2' from using the fastmap, so
4622	that is all we do. /*
4623	case duplicate:
4624	bufp->can_be_null = `1`;
4625	goto done;
4626
4627
4628	/ Following are the cases which match a character. These end*
4629	with `break'. /*
4630
4631	#ifdef WCHAR
4632	case exactn:
4633	fastmap[truncate_wchar(p[`1`])] = `1`;
4634	break;
4635	#else /* BYTE */
4636	case exactn:
4637	fastmap[p[`1`]] = `1`;
4638	break;
4639	#endif /* WCHAR */
4640	#ifdef MBS_SUPPORT
4641	case exactn_bin:
4642	fastmap[p[`1`]] = `1`;
4643	break;
4644	#endif
4645
4646	#ifdef WCHAR
4647	/ It is hard to distinguish fastmap from (multi byte) characters*
4648	which depends on current locale. /*
4649	case charset:
4650	case charset_not:
4651	case wordchar:
4652	case notwordchar:
4653	bufp->can_be_null = `1`;
4654	goto done;
4655	#else /* BYTE */
4656	case charset:
4657	for (j = p++ BYTEWIDTH - `1`; j >= `0`; j--)
4658	if (p[j / BYTEWIDTH] & (`1` << (j % BYTEWIDTH)))
4659	fastmap[j] = `1`;
4660	break;
4661
4662
4663	case charset_not:
4664	/ Chars beyond end of map must be allowed. /
4665	for (j = p BYTEWIDTH; j < (`1` << BYTEWIDTH); j++)
4666	fastmap[j] = `1`;
4667
4668	for (j = p++ BYTEWIDTH - `1`; j >= `0`; j--)
4669	if (!(p[j / BYTEWIDTH] & (`1` << (j % BYTEWIDTH))))
4670	fastmap[j] = `1`;
4671	break;
4672
4673
4674	case wordchar:
4675	for (j = `0`; j < (`1` << BYTEWIDTH); j++)
4676	if (SYNTAX (j) == Sword)
4677	fastmap[j] = `1`;
4678	break;
4679
4680
4681	case notwordchar:
4682	for (j = `0`; j < (`1` << BYTEWIDTH); j++)
4683	if (SYNTAX (j) != Sword)
4684	fastmap[j] = `1`;
4685	break;
4686	#endif /* WCHAR */
4687
4688	case anychar:
4689	{
4690	int fastmap_newline = fastmap[`'\n'`];
4691
4692	/ `.' matches anything ... /
4693	for (j = `0`; j < (`1` << BYTEWIDTH); j++)
4694	fastmap[j] = `1`;
4695
4696	/ ... except perhaps newline. /
4697	if (!(bufp->syntax & RE_DOT_NEWLINE))
4698	fastmap[`'\n'`] = fastmap_newline;
4699
4700	/ Return if we have already set `can_be_null'; if we have,*
4701	then the fastmap is irrelevant. Something's wrong here. /*
4702	else if (bufp->can_be_null)
4703	goto done;
4704
4705	/ Otherwise, have to check alternative paths. /
4706	break;
4707	}
4708
4709	#ifdef emacs
4710	case syntaxspec:
4711	k = *p++;
4712	for (j = `0`; j < (`1` << BYTEWIDTH); j++)
4713	if (SYNTAX (j) == (enum syntaxcode) k)
4714	fastmap[j] = `1`;
4715	break;
4716
4717
4718	case notsyntaxspec:
4719	k = *p++;
4720	for (j = `0`; j < (`1` << BYTEWIDTH); j++)
4721	if (SYNTAX (j) != (enum syntaxcode) k)
4722	fastmap[j] = `1`;
4723	break;
4724
4725
4726	/ All cases after this match the empty string. These end with*
4727	`continue'. /*
4728
4729
4730	case before_dot:
4731	case at_dot:
4732	case after_dot:
4733	continue;
4734	#endif /* emacs */
4735
4736
4737	case no_op:
4738	case begline:
4739	case endline:
4740	case begbuf:
4741	case endbuf:
4742	case wordbound:
4743	case notwordbound:
4744	case wordbeg:
4745	case wordend:
4746	case push_dummy_failure:
4747	continue;
4748
4749
4750	case jump_n:
4751	case pop_failure_jump:
4752	case maybe_pop_jump:
4753	case jump:
4754	case jump_past_alt:
4755	case dummy_failure_jump:
4756	EXTRACT_NUMBER_AND_INCR (j, p);
4757	p += j;
4758	if (j > `0`)
4759	continue;
4760
4761	/ Jump backward implies we just went through the body of a*
4762	loop and matched nothing. Opcode jumped to should be
4763	`on_failure_jump' or `succeed_n'. Just treat it like an
4764	ordinary jump. For a loop, it has pushed its failure*
4765	point already; if so, discard that as redundant. /*
4766	if ((re_opcode_t) *p != on_failure_jump
4767	&& (re_opcode_t) *p != succeed_n)
4768	continue;
4769
4770	p++;
4771	EXTRACT_NUMBER_AND_INCR (j, p);
4772	p += j;
4773
4774	/ If what's on the stack is where we are now, pop it. /
4775	if (!FAIL_STACK_EMPTY ()
4776	&& fail_stack.stack[fail_stack.avail - `1`].pointer == p)
4777	fail_stack.avail--;
4778
4779	continue;
4780
4781
4782	case on_failure_jump:
4783	case on_failure_keep_string_jump:
4784	handle_on_failure_jump:
4785	EXTRACT_NUMBER_AND_INCR (j, p);
4786
4787	/ For some patterns, e.g., `(a?)?', `p+j' here points to the*
4788	end of the pattern. We don't want to push such a point,
4789	since when we restore it above, entering the switch will
4790	increment `p' past the end of the pattern. We don't need
4791	to push such a point since we obviously won't find any more
4792	fastmap entries beyond `pend'. Such a pattern can match
4793	the null string, though. /*
4794	if (p + j < pend)
4795	{
4796	if (!PUSH_PATTERN_OP (p + j, fail_stack))
4797	{
4798	RESET_FAIL_STACK ();
4799	return -`2`;
4800	}
4801	}
4802	else
4803	bufp->can_be_null = `1`;
4804
4805	if (succeed_n_p)
4806	{
4807	EXTRACT_NUMBER_AND_INCR (k, p); / Skip the n. /
4808	succeed_n_p = false;
4809	}
4810
4811	continue;
4812
4813
4814	case succeed_n:
4815	/ Get to the number of times to succeed. /
4816	p += OFFSET_ADDRESS_SIZE;
4817
4818	/ Increment p past the n for when k != 0. /
4819	EXTRACT_NUMBER_AND_INCR (k, p);
4820	if (k == `0`)
4821	{
4822	p -= `2` * OFFSET_ADDRESS_SIZE;
4823	succeed_n_p = true; / Spaghetti code alert. /
4824	goto handle_on_failure_jump;
4825	}
4826	continue;
4827
4828
4829	case set_number_at:
4830	p += `2` * OFFSET_ADDRESS_SIZE;
4831	continue;
4832
4833
4834	case start_memory:
4835	case stop_memory:
4836	p += `2`;
4837	continue;
4838
4839
4840	default:
4841	abort (); / We have listed all the cases. /
4842	} / switch p++ /*
4843
4844	/ Getting here means we have found the possible starting*
4845	characters for one path of the pattern -- and that the empty
4846	string does not match. We need not follow this path further.
4847	Instead, look at the next alternative (remembered on the
4848	stack), or quit if no more. The test at the top of the loop
4849	does these things. /*
4850	path_can_be_null = false;
4851	p = pend;
4852	} / while p /
4853
4854	/ Set `can_be_null' for the last path (also the first path, if the*
4855	pattern is empty). /*
4856	bufp->can_be_null \|= path_can_be_null;
4857
4858	done:
4859	RESET_FAIL_STACK ();
4860	return `0`;
4861	}
4862
4863	#else /* not INSIDE_RECURSION */
4864
4865	int
4866	re_compile_fastmap (struct re_pattern_buffer *bufp)
4867	{
4868	# ifdef MBS_SUPPORT
4869	if (MB_CUR_MAX != `1`)
4870	return wcs_re_compile_fastmap(bufp);
4871	else
4872	# endif
4873	return byte_re_compile_fastmap(bufp);
4874	} / re_compile_fastmap /
4875	#ifdef _LIBC
4876	weak_alias (__re_compile_fastmap, re_compile_fastmap)
4877	#endif
4878
4879
4880	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
4881	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
4882	this memory for recording register information. STARTS and ENDS
4883	must be allocated using the malloc library routine, and must each
4884	be at least NUM_REGS sizeof (regoff_t) bytes long.*
4885
4886	If NUM_REGS == 0, then subsequent matches should allocate their own
4887	register data.
4888
4889	Unless this function is called, the first search or match using
4890	PATTERN_BUFFER will allocate its own register data, without
4891	freeing the old data. /*
4892
4893	void
4894	re_set_registers (struct re_pattern_buffer *bufp,
4895	struct re_registers regs, unsigned* num_regs,
4896	regoff_t starts, regoff_t ends)
4897	{
4898	if (num_regs)
4899	{
4900	bufp->regs_allocated = REGS_REALLOCATE;
4901	regs->num_regs = num_regs;
4902	regs->start = starts;
4903	regs->end = ends;
4904	}
4905	else
4906	{
4907	bufp->regs_allocated = REGS_UNALLOCATED;
4908	regs->num_regs = `0`;
4909	regs->start = regs->end = (regoff_t *) `0`;
4910	}
4911	}
4912	#ifdef _LIBC
4913	weak_alias (__re_set_registers, re_set_registers)
4914	#endif
4915
4916	/ Searching routines. /
4917
4918	/ Like re_search_2, below, but only one string is specified, and*
4919	doesn't let you say where to stop matching. /*
4920
4921	int
4922	re_search (struct re_pattern_buffer bufp, const* char string, int* size,
4923	int startpos, int range, struct re_registers *regs)
4924	{
4925	return re_search_2 (buffer: bufp, NULL, length1: `0`, string2: string, length2: size, start: startpos, range,
4926	regs, stop: size);
4927	}
4928	#ifdef _LIBC
4929	weak_alias (__re_search, re_search)
4930	#endif
4931
4932
4933	/ Using the compiled pattern in BUFP->buffer, first tries to match the*
4934	virtual concatenation of STRING1 and STRING2, starting first at index
4935	STARTPOS, then at STARTPOS + 1, and so on.
4936
4937	STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4938
4939	RANGE is how far to scan while trying to match. RANGE = 0 means try
4940	only at STARTPOS; in general, the last start tried is STARTPOS +
4941	RANGE.
4942
4943	In REGS, return the indices of the virtual concatenation of STRING1
4944	and STRING2 that matched the entire BUFP->buffer and its contained
4945	subexpressions.
4946
4947	Do not consider matching one past the index STOP in the virtual
4948	concatenation of STRING1 and STRING2.
4949
4950	We return either the position in the strings at which the match was
4951	found, -1 if no match, or -2 if error (such as failure
4952	stack overflow). /*
4953
4954	int
4955	re_search_2 (struct re_pattern_buffer bufp, const* char string1, int* size1,
4956	const char string2, int* size2, int startpos, int range,
4957	struct re_registers regs, int* stop)
4958	{
4959	# ifdef MBS_SUPPORT
4960	if (MB_CUR_MAX != `1`)
4961	return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4962	range, regs, stop);
4963	else
4964	# endif
4965	return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4966	range, regs, stop);
4967	} / re_search_2 /
4968	#ifdef _LIBC
4969	weak_alias (__re_search_2, re_search_2)
4970	#endif
4971
4972	#endif /* not INSIDE_RECURSION */
4973
4974	#ifdef INSIDE_RECURSION
4975
4976	#ifdef MATCH_MAY_ALLOCATE
4977	# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4978	#else
4979	# define FREE_VAR(var) free (var); var = NULL
4980	#endif
4981
4982	#ifdef WCHAR
4983	# define MAX_ALLOCA_SIZE 2000
4984
4985	# define FREE_WCS_BUFFERS() \
4986	do { \
4987	if (size1 > MAX_ALLOCA_SIZE) \
4988	{ \
4989	free (wcs_string1); \
4990	free (mbs_offset1); \
4991	} \
4992	else \
4993	{ \
4994	FREE_VAR (wcs_string1); \
4995	FREE_VAR (mbs_offset1); \
4996	} \
4997	if (size2 > MAX_ALLOCA_SIZE) \
4998	{ \
4999	free (wcs_string2); \
5000	free (mbs_offset2); \
5001	} \
5002	else \
5003	{ \
5004	FREE_VAR (wcs_string2); \
5005	FREE_VAR (mbs_offset2); \
5006	} \
5007	} while (0)
5008
5009	#endif
5010
5011
5012	static int
5013	PREFIX(re_search_2) (struct re_pattern_buffer bufp, const* char *string1,
5014	int size1, const char string2, int* size2,
5015	int startpos, int range,
5016	struct re_registers regs, int* stop)
5017	{
5018	int val;
5019	register char *fastmap = bufp->fastmap;
5020	register RE_TRANSLATE_TYPE translate = bufp->translate;
5021	int total_size = size1 + size2;
5022	int endpos = startpos + range;
5023	#ifdef WCHAR
5024	/ We need wchar_t* buffers correspond to cstring1, cstring2. /
5025	wchar_t wcs_string1 = NULL, wcs_string2 = NULL;
5026	/ We need the size of wchar_t buffers correspond to csize1, csize2. /
5027	int wcs_size1 = `0`, wcs_size2 = `0`;
5028	/ offset buffer for optimizatoin. See convert_mbs_to_wc. /
5029	int mbs_offset1 = NULL, mbs_offset2 = NULL;
5030	/ They hold whether each wchar_t is binary data or not. /
5031	char *is_binary = NULL;
5032	#endif /* WCHAR */
5033
5034	/ Check for out-of-range STARTPOS. /
5035	if (startpos < `0` \|\| startpos > total_size)
5036	return -`1`;
5037
5038	/ Fix up RANGE if it might eventually take us outside*
5039	the virtual concatenation of STRING1 and STRING2.
5040	Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. /*
5041	if (endpos < `0`)
5042	range = `0` - startpos;
5043	else if (endpos > total_size)
5044	range = total_size - startpos;
5045
5046	/ If the search isn't to be a backwards one, don't waste time in a*
5047	search for a pattern that must be anchored. /*
5048	if (bufp->used > `0` && range > `0`
5049	&& ((re_opcode_t) bufp->buffer[`0`] == begbuf
5050	/ `begline' is like `begbuf' if it cannot match at newlines. /
5051	\|\| ((re_opcode_t) bufp->buffer[`0`] == begline
5052	&& !bufp->newline_anchor)))
5053	{
5054	if (startpos > `0`)
5055	return -`1`;
5056	else
5057	range = `1`;
5058	}
5059
5060	#ifdef emacs
5061	/ In a forward search for something that starts with \=.*
5062	don't keep searching past point. /*
5063	if (bufp->used > `0` && (re_opcode_t) bufp->buffer[`0`] == at_dot && range > `0`)
5064	{
5065	range = PT - startpos;
5066	if (range <= `0`)
5067	return -`1`;
5068	}
5069	#endif /* emacs */
5070
5071	/ Update the fastmap now if not correct already. /
5072	if (fastmap && !bufp->fastmap_accurate)
5073	if (re_compile_fastmap (bufp) == -`2`)
5074	return -`2`;
5075
5076	#ifdef WCHAR
5077	/ Allocate wchar_t array for wcs_string1 and wcs_string2 and*
5078	fill them with converted string. /*
5079	if (size1 != `0`)
5080	{
5081	if (size1 > MAX_ALLOCA_SIZE)
5082	{
5083	wcs_string1 = TALLOC (size1 + `1`, CHAR_T);
5084	mbs_offset1 = TALLOC (size1 + `1`, int);
5085	is_binary = TALLOC (size1 + `1`, char);
5086	}
5087	else
5088	{
5089	wcs_string1 = REGEX_TALLOC (size1 + `1`, CHAR_T);
5090	mbs_offset1 = REGEX_TALLOC (size1 + `1`, int);
5091	is_binary = REGEX_TALLOC (size1 + `1`, char);
5092	}
5093	if (!wcs_string1 \|\| !mbs_offset1 \|\| !is_binary)
5094	{
5095	if (size1 > MAX_ALLOCA_SIZE)
5096	{
5097	free (wcs_string1);
5098	free (mbs_offset1);
5099	free (is_binary);
5100	}
5101	else
5102	{
5103	FREE_VAR (wcs_string1);
5104	FREE_VAR (mbs_offset1);
5105	FREE_VAR (is_binary);
5106	}
5107	return -`2`;
5108	}
5109	wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5110	mbs_offset1, is_binary);
5111	wcs_string1[wcs_size1] = L`'\0'`; / for a sentinel /
5112	if (size1 > MAX_ALLOCA_SIZE)
5113	free (is_binary);
5114	else
5115	FREE_VAR (is_binary);
5116	}
5117	if (size2 != `0`)
5118	{
5119	if (size2 > MAX_ALLOCA_SIZE)
5120	{
5121	wcs_string2 = TALLOC (size2 + `1`, CHAR_T);
5122	mbs_offset2 = TALLOC (size2 + `1`, int);
5123	is_binary = TALLOC (size2 + `1`, char);
5124	}
5125	else
5126	{
5127	wcs_string2 = REGEX_TALLOC (size2 + `1`, CHAR_T);
5128	mbs_offset2 = REGEX_TALLOC (size2 + `1`, int);
5129	is_binary = REGEX_TALLOC (size2 + `1`, char);
5130	}
5131	if (!wcs_string2 \|\| !mbs_offset2 \|\| !is_binary)
5132	{
5133	FREE_WCS_BUFFERS ();
5134	if (size2 > MAX_ALLOCA_SIZE)
5135	free (is_binary);
5136	else
5137	FREE_VAR (is_binary);
5138	return -`2`;
5139	}
5140	wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5141	mbs_offset2, is_binary);
5142	wcs_string2[wcs_size2] = L`'\0'`; / for a sentinel /
5143	if (size2 > MAX_ALLOCA_SIZE)
5144	free (is_binary);
5145	else
5146	FREE_VAR (is_binary);
5147	}
5148	#endif /* WCHAR */
5149
5150
5151	/ Loop through the string, looking for a place to start matching. /
5152	for (;;)
5153	{
5154	/ If a fastmap is supplied, skip quickly over characters that*
5155	cannot be the start of a match. If the pattern can match the
5156	null string, however, we don't need to skip characters; we want
5157	the first null string. /*
5158	if (fastmap && startpos < total_size && !bufp->can_be_null)
5159	{
5160	if (range > `0`) / Searching forwards. /
5161	{
5162	register const char *d;
5163	register int lim = `0`;
5164	int irange = range;
5165
5166	if (startpos < size1 && startpos + range >= size1)
5167	lim = range - (size1 - startpos);
5168
5169	d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5170
5171	/ Written out as an if-else to avoid testing `translate'*
5172	inside the loop. /*
5173	if (translate)
5174	while (range > lim
5175	&& !fastmap[(unsigned char)
5176	translate[(unsigned char) *d++]])
5177	range--;
5178	else
5179	while (range > lim && !fastmap[(unsigned char) *d++])
5180	range--;
5181
5182	startpos += irange - range;
5183	}
5184	else / Searching backwards. /
5185	{
5186	register CHAR_T c = (size1 == `0` \|\| startpos >= size1
5187	? string2[startpos - size1]
5188	: string1[startpos]);
5189
5190	if (!fastmap[(unsigned char) TRANSLATE (c)])
5191	goto advance;
5192	}
5193	}
5194
5195	/ If can't match the null string, and that's all we have left, fail. /
5196	if (range >= `0` && startpos == total_size && fastmap
5197	&& !bufp->can_be_null)
5198	{
5199	#ifdef WCHAR
5200	FREE_WCS_BUFFERS ();
5201	#endif
5202	return -`1`;
5203	}
5204
5205	#ifdef WCHAR
5206	val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5207	size2, startpos, regs, stop,
5208	wcs_string1, wcs_size1,
5209	wcs_string2, wcs_size2,
5210	mbs_offset1, mbs_offset2);
5211	#else /* BYTE */
5212	val = byte_re_match_2_internal (bufp, string1, size1, string2,
5213	size2, startpos, regs, stop);
5214	#endif /* BYTE */
5215
5216	#ifndef REGEX_MALLOC
5217	# ifdef C_ALLOCA
5218	alloca (`0`);
5219	# endif
5220	#endif
5221
5222	if (val >= `0`)
5223	{
5224	#ifdef WCHAR
5225	FREE_WCS_BUFFERS ();
5226	#endif
5227	return startpos;
5228	}
5229
5230	if (val == -`2`)
5231	{
5232	#ifdef WCHAR
5233	FREE_WCS_BUFFERS ();
5234	#endif
5235	return -`2`;
5236	}
5237
5238	advance:
5239	if (!range)
5240	break;
5241	else if (range > `0`)
5242	{
5243	range--;
5244	startpos++;
5245	}
5246	else
5247	{
5248	range++;
5249	startpos--;
5250	}
5251	}
5252	#ifdef WCHAR
5253	FREE_WCS_BUFFERS ();
5254	#endif
5255	return -`1`;
5256	}
5257
5258	#ifdef WCHAR
5259	/ This converts PTR, a pointer into one of the search wchar_t strings*
5260	`string1' and `string2' into an multibyte string offset from the
5261	beginning of that string. We use mbs_offset to optimize.
5262	See convert_mbs_to_wcs. /*
5263	# define POINTER_TO_OFFSET(ptr) \
5264	(FIRST_STRING_P (ptr) \
5265	? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
5266	: ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
5267	+ csize1)))
5268	#else /* BYTE */
5269	/ This converts PTR, a pointer into one of the search strings `string1'*
5270	and `string2' into an offset from the beginning of that string. /*
5271	# define POINTER_TO_OFFSET(ptr) \
5272	(FIRST_STRING_P (ptr) \
5273	? ((regoff_t) ((ptr) - string1)) \
5274	: ((regoff_t) ((ptr) - string2 + size1)))
5275	#endif /* WCHAR */
5276
5277	/ Macros for dealing with the split strings in re_match_2. /
5278
5279	#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
5280
5281	/ Call before fetching a character with d. This switches over to
5282	string2 if necessary. /*
5283	#define PREFETCH() \
5284	while (d == dend) \
5285	{ \
5286	/* End of string2 => fail. */ \
5287	if (dend == end_match_2) \
5288	goto fail; \
5289	/* End of string1 => advance to string2. */ \
5290	d = string2; \
5291	dend = end_match_2; \
5292	}
5293
5294	/ Test if at very beginning or at very end of the virtual concatenation*
5295	of `string1' and `string2'. If only one string, it's `string2'. /*
5296	#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) \|\| !size2)
5297	#define AT_STRINGS_END(d) ((d) == end2)
5298
5299
5300	/ Test if D points to a character which is word-constituent. We have*
5301	two special cases to check for: if past the end of string1, look at
5302	the first character in string2; and if before the beginning of
5303	string2, look at the last character in string1. /*
5304	#ifdef WCHAR
5305	/ Use internationalized API instead of SYNTAX. /
5306	# define WORDCHAR_P(d) \
5307	(iswalnum ((wint_t)((d) == end1 ? *string2 \
5308	: (d) == string2 - 1 ? (end1 - 1) : (d))) != 0 \
5309	\|\| ((d) == end1 ? *string2 \
5310	: (d) == string2 - 1 ? (end1 - 1) : (d)) == L'_')
5311	#else /* BYTE */
5312	# define WORDCHAR_P(d) \
5313	(SYNTAX ((d) == end1 ? *string2 \
5314	: (d) == string2 - 1 ? (end1 - 1) : (d)) \
5315	== Sword)
5316	#endif /* WCHAR */
5317
5318	/ Disabled due to a compiler bug -- see comment at case wordbound /
5319	#if 0
5320	/ Test if the character before D and the one at D differ with respect*
5321	to being word-constituent. /*
5322	#define AT_WORD_BOUNDARY(d) \
5323	(AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d) \
5324	\|\| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5325	#endif
5326
5327	/ Free everything we malloc. /
5328	#ifdef MATCH_MAY_ALLOCATE
5329	# ifdef WCHAR
5330	# define FREE_VARIABLES() \
5331	do { \
5332	REGEX_FREE_STACK (fail_stack.stack); \
5333	FREE_VAR (regstart); \
5334	FREE_VAR (regend); \
5335	FREE_VAR (old_regstart); \
5336	FREE_VAR (old_regend); \
5337	FREE_VAR (best_regstart); \
5338	FREE_VAR (best_regend); \
5339	FREE_VAR (reg_info); \
5340	FREE_VAR (reg_dummy); \
5341	FREE_VAR (reg_info_dummy); \
5342	if (!cant_free_wcs_buf) \
5343	{ \
5344	FREE_VAR (string1); \
5345	FREE_VAR (string2); \
5346	FREE_VAR (mbs_offset1); \
5347	FREE_VAR (mbs_offset2); \
5348	} \
5349	} while (0)
5350	# else /* BYTE */
5351	# define FREE_VARIABLES() \
5352	do { \
5353	REGEX_FREE_STACK (fail_stack.stack); \
5354	FREE_VAR (regstart); \
5355	FREE_VAR (regend); \
5356	FREE_VAR (old_regstart); \
5357	FREE_VAR (old_regend); \
5358	FREE_VAR (best_regstart); \
5359	FREE_VAR (best_regend); \
5360	FREE_VAR (reg_info); \
5361	FREE_VAR (reg_dummy); \
5362	FREE_VAR (reg_info_dummy); \
5363	} while (0)
5364	# endif /* WCHAR */
5365	#else
5366	# ifdef WCHAR
5367	# define FREE_VARIABLES() \
5368	do { \
5369	if (!cant_free_wcs_buf) \
5370	{ \
5371	FREE_VAR (string1); \
5372	FREE_VAR (string2); \
5373	FREE_VAR (mbs_offset1); \
5374	FREE_VAR (mbs_offset2); \
5375	} \
5376	} while (0)
5377	# else /* BYTE */
5378	# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
5379	# endif /* WCHAR */
5380	#endif /* not MATCH_MAY_ALLOCATE */
5381
5382	/ These values must meet several constraints. They must not be valid*
5383	register values; since we have a limit of 255 registers (because
5384	we use only one byte in the pattern for the register number), we can
5385	use numbers larger than 255. They must differ by 1, because of
5386	NUM_FAILURE_ITEMS above. And the value for the lowest register must
5387	be larger than the value for the highest register, so we do not try
5388	to actually save any registers when none are active. /*
5389	#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5390	#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5391
5392	#else /* not INSIDE_RECURSION */
5393	/ Matching routines. /
5394
5395	#ifndef emacs /* Emacs never uses this. */
5396	/ re_match is like re_match_2 except it takes only a single string. /
5397
5398	int
5399	re_match (struct re_pattern_buffer bufp, const* char *string,
5400	int size, int pos, struct re_registers *regs)
5401	{
5402	int result;
5403	# ifdef MBS_SUPPORT
5404	if (MB_CUR_MAX != `1`)
5405	result = wcs_re_match_2_internal (bufp, NULL, `0`, string, size,
5406	pos, regs, size,
5407	NULL, `0`, NULL, `0`, NULL, NULL);
5408	else
5409	# endif
5410	result = byte_re_match_2_internal (bufp, NULL, size1: `0`, string2: string, size2: size,
5411	pos, regs, stop: size);
5412	# ifndef REGEX_MALLOC
5413	# ifdef C_ALLOCA
5414	alloca (`0`);
5415	# endif
5416	# endif
5417	return result;
5418	}
5419	# ifdef _LIBC
5420	weak_alias (__re_match, re_match)
5421	# endif
5422	#endif /* not emacs */
5423
5424	#endif /* not INSIDE_RECURSION */
5425
5426	#ifdef INSIDE_RECURSION
5427	static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
5428	UCHAR_T *end,
5429	PREFIX(register_info_type) *reg_info);
5430	static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
5431	UCHAR_T *end,
5432	PREFIX(register_info_type) *reg_info);
5433	static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
5434	UCHAR_T *end,
5435	PREFIX(register_info_type) *reg_info);
5436	static int PREFIX(bcmp_translate) (const CHAR_T s1, const* CHAR_T *s2,
5437	int len, char *translate);
5438	#else /* not INSIDE_RECURSION */
5439
5440	/ re_match_2 matches the compiled pattern in BUFP against the*
5441	the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5442	and SIZE2, respectively). We start matching at POS, and stop
5443	matching at STOP.
5444
5445	If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5446	store offsets for the substring each group matched in REGS. See the
5447	documentation for exactly how many groups we fill.
5448
5449	We return -1 if no match, -2 if an internal error (such as the
5450	failure stack overflowing). Otherwise, we return the length of the
5451	matched substring. /*
5452
5453	int
5454	re_match_2 (struct re_pattern_buffer bufp, const* char string1, int* size1,
5455	const char string2, int* size2, int pos,
5456	struct re_registers regs, int* stop)
5457	{
5458	int result;
5459	# ifdef MBS_SUPPORT
5460	if (MB_CUR_MAX != `1`)
5461	result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5462	pos, regs, stop,
5463	NULL, `0`, NULL, `0`, NULL, NULL);
5464	else
5465	# endif
5466	result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5467	pos, regs, stop);
5468
5469	#ifndef REGEX_MALLOC
5470	# ifdef C_ALLOCA
5471	alloca (`0`);
5472	# endif
5473	#endif
5474	return result;
5475	}
5476	#ifdef _LIBC
5477	weak_alias (__re_match_2, re_match_2)
5478	#endif
5479
5480	#endif /* not INSIDE_RECURSION */
5481
5482	#ifdef INSIDE_RECURSION
5483
5484	#ifdef WCHAR
5485	static int count_mbs_length (int , int*);
5486
5487	/ This check the substring (from 0, to length) of the multibyte string,*
5488	to which offset_buffer correspond. And count how many wchar_t_characters
5489	the substring occupy. We use offset_buffer to optimization.
5490	See convert_mbs_to_wcs. /*
5491
5492	static int
5493	count_mbs_length(int offset_buffer, int* length)
5494	{
5495	int upper, lower;
5496
5497	/ Check whether the size is valid. /
5498	if (length < `0`)
5499	return -`1`;
5500
5501	if (offset_buffer == NULL)
5502	return `0`;
5503
5504	/ If there are no multibyte character, offset_buffer[i] == i.*
5505	Optmize for this case. /*
5506	if (offset_buffer[length] == length)
5507	return length;
5508
5509	/ Set up upper with length. (because for all i, offset_buffer[i] >= i) /
5510	upper = length;
5511	lower = `0`;
5512
5513	while (true)
5514	{
5515	int middle = (lower + upper) / `2`;
5516	if (middle == lower \|\| middle == upper)
5517	break;
5518	if (offset_buffer[middle] > length)
5519	upper = middle;
5520	else if (offset_buffer[middle] < length)
5521	lower = middle;
5522	else
5523	return middle;
5524	}
5525
5526	return -`1`;
5527	}
5528	#endif /* WCHAR */
5529
5530	/ This is a separate function so that we can force an alloca cleanup*
5531	afterwards. /*
5532	#ifdef WCHAR
5533	static int
5534	wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
5535	const char cstring1, int* csize1,
5536	const char cstring2, int* csize2,
5537	int pos,
5538	struct re_registers *regs,
5539	int stop,
5540	/ string1 == string2 == NULL means string1/2, size1/2 and*
5541	mbs_offset1/2 need seting up in this function. /*
5542	/ We need wchar_t* buffers correspond to cstring1, cstring2. /
5543	wchar_t string1, int* size1,
5544	wchar_t string2, int* size2,
5545	/ offset buffer for optimizatoin. See convert_mbs_to_wc. /
5546	int mbs_offset1, int* *mbs_offset2)
5547	#else /* BYTE */
5548	static int
5549	byte_re_match_2_internal (struct re_pattern_buffer *bufp,
5550	const char string1, int* size1,
5551	const char string2, int* size2,
5552	int pos,
5553	struct re_registers regs, int* stop)
5554	#endif /* BYTE */
5555	{
5556	/ General temporaries. /
5557	int mcnt;
5558	UCHAR_T *p1;
5559	#ifdef WCHAR
5560	/ They hold whether each wchar_t is binary data or not. /
5561	char *is_binary = NULL;
5562	/ If true, we can't free string1/2, mbs_offset1/2. /
5563	int cant_free_wcs_buf = `1`;
5564	#endif /* WCHAR */
5565
5566	/ Just past the end of the corresponding string. /
5567	const CHAR_T end1, end2;
5568
5569	/ Pointers into string1 and string2, just past the last characters in*
5570	each to consider matching. /*
5571	const CHAR_T end_match_1, end_match_2;
5572
5573	/ Where we are in the data, and the end of the current string. /
5574	const CHAR_T d, dend;
5575
5576	/ Where we are in the pattern, and the end of the pattern. /
5577	#ifdef WCHAR
5578	UCHAR_T pattern, p;
5579	register UCHAR_T *pend;
5580	#else /* BYTE */
5581	UCHAR_T *p = bufp->buffer;
5582	register UCHAR_T *pend = p + bufp->used;
5583	#endif /* WCHAR */
5584
5585	/ Mark the opcode just after a start_memory, so we can test for an*
5586	empty subpattern when we get to the stop_memory. /*
5587	UCHAR_T *just_past_start_mem = `0`;
5588
5589	/ We use this to map every character in the string. /
5590	RE_TRANSLATE_TYPE translate = bufp->translate;
5591
5592	/ Failure point stack. Each place that can handle a failure further*
5593	down the line pushes a failure point on this stack. It consists of
5594	restart, regend, and reg_info for all registers corresponding to
5595	the subexpressions we're currently inside, plus the number of such
5596	registers, and, finally, two char 's. The first char * is where*
5597	to resume scanning the pattern; the second one is where to resume
5598	scanning the strings. If the latter is zero, the failure point is
5599	a ``dummy''; if a failure happens and the failure point is a dummy,
5600	it gets discarded and the next one is tried. /*
5601	#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5602	PREFIX(fail_stack_type) fail_stack;
5603	#endif
5604	#ifdef DEBUG
5605	static unsigned failure_id;
5606	unsigned nfailure_points_pushed = `0`, nfailure_points_popped = `0`;
5607	#endif
5608
5609	#ifdef REL_ALLOC
5610	/ This holds the pointer to the failure stack, when*
5611	it is allocated relocatably. /*
5612	fail_stack_elt_t *failure_stack_ptr;
5613	#endif
5614
5615	/ We fill all the registers internally, independent of what we*
5616	return, for use in backreferences. The number here includes
5617	an element for register zero. /*
5618	size_t num_regs = bufp->re_nsub + `1`;
5619
5620	/ The currently active registers. /
5621	active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5622	active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5623
5624	/ Information on the contents of registers. These are pointers into*
5625	the input strings; they record just what was matched (on this
5626	attempt) by a subexpression part of the pattern, that is, the
5627	regnum-th regstart pointer points to where in the pattern we began
5628	matching and the regnum-th regend points to right after where we
5629	stopped matching the regnum-th subexpression. (The zeroth register
5630	keeps track of what the whole pattern matches.) /*
5631	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5632	const CHAR_T regstart, regend;
5633	#endif
5634
5635	/ If a group that's operated upon by a repetition operator fails to*
5636	match anything, then the register for its start will need to be
5637	restored because it will have been set to wherever in the string we
5638	are when we last see its open-group operator. Similarly for a
5639	register's end. /*
5640	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5641	const CHAR_T old_regstart, old_regend;
5642	#endif
5643
5644	/ The is_active field of reg_info helps us keep track of which (possibly*
5645	nested) subexpressions we are currently in. The matched_something
5646	field of reg_info[reg_num] helps us tell whether or not we have
5647	matched any of the pattern so far this time through the reg_num-th
5648	subexpression. These two fields get reset each time through any
5649	loop their register is in. /*
5650	#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5651	PREFIX(register_info_type) *reg_info;
5652	#endif
5653
5654	/ The following record the register info as found in the above*
5655	variables when we find a match better than any we've seen before.
5656	This happens as we backtrack through the failure points, which in
5657	turn happens only if we have not yet matched the entire string. /*
5658	unsigned best_regs_set = false;
5659	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5660	const CHAR_T best_regstart, best_regend;
5661	#endif
5662
5663	/ Logically, this is `best_regend[0]'. But we don't want to have to*
5664	allocate space for that if we're not allocating space for anything
5665	else (see below). Also, we never need info about register 0 for
5666	any of the other register vectors, and it seems rather a kludge to
5667	treat `best_regend' differently than the rest. So we keep track of
5668	the end of the best match so far in a separate variable. We
5669	initialize this to NULL so that when we backtrack the first time
5670	and need to test it, it's not garbage. /*
5671	const CHAR_T *match_end = NULL;
5672
5673	/ This helps SET_REGS_MATCHED avoid doing redundant work. /
5674	int set_regs_matched_done = `0`;
5675
5676	/ Used when we pop values we don't care about. /
5677	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5678	const CHAR_T **reg_dummy;
5679	PREFIX(register_info_type) *reg_info_dummy;
5680	#endif
5681
5682	#ifdef DEBUG
5683	/ Counts the total number of registers pushed. /
5684	unsigned num_regs_pushed = `0`;
5685	#endif
5686
5687	DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5688
5689	INIT_FAIL_STACK ();
5690
5691	#ifdef MATCH_MAY_ALLOCATE
5692	/ Do not bother to initialize all the register variables if there are*
5693	no groups in the pattern, as it takes a fair amount of time. If
5694	there are groups, we include space for register 0 (the whole
5695	pattern), even though we never use it, since it simplifies the
5696	array indexing. We should fix this. /*
5697	if (bufp->re_nsub)
5698	{
5699	regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5700	regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5701	old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5702	old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5703	best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5704	best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5705	reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5706	reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5707	reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5708
5709	if (!(regstart && regend && old_regstart && old_regend && reg_info
5710	&& best_regstart && best_regend && reg_dummy && reg_info_dummy))
5711	{
5712	FREE_VARIABLES ();
5713	return -`2`;
5714	}
5715	}
5716	else
5717	{
5718	/ We must initialize all our variables to NULL, so that*
5719	`FREE_VARIABLES' doesn't try to free them. /*
5720	regstart = regend = old_regstart = old_regend = best_regstart
5721	= best_regend = reg_dummy = NULL;
5722	reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5723	}
5724	#endif /* MATCH_MAY_ALLOCATE */
5725
5726	/ The starting position is bogus. /
5727	#ifdef WCHAR
5728	if (pos < `0` \|\| pos > csize1 + csize2)
5729	#else /* BYTE */
5730	if (pos < `0` \|\| pos > size1 + size2)
5731	#endif
5732	{
5733	FREE_VARIABLES ();
5734	return -`1`;
5735	}
5736
5737	#ifdef WCHAR
5738	/ Allocate wchar_t array for string1 and string2 and*
5739	fill them with converted string. /*
5740	if (string1 == NULL && string2 == NULL)
5741	{
5742	/ We need seting up buffers here. /
5743
5744	/ We must free wcs buffers in this function. /
5745	cant_free_wcs_buf = `0`;
5746
5747	if (csize1 != `0`)
5748	{
5749	string1 = REGEX_TALLOC (csize1 + `1`, CHAR_T);
5750	mbs_offset1 = REGEX_TALLOC (csize1 + `1`, int);
5751	is_binary = REGEX_TALLOC (csize1 + `1`, char);
5752	if (!string1 \|\| !mbs_offset1 \|\| !is_binary)
5753	{
5754	FREE_VAR (string1);
5755	FREE_VAR (mbs_offset1);
5756	FREE_VAR (is_binary);
5757	return -`2`;
5758	}
5759	}
5760	if (csize2 != `0`)
5761	{
5762	string2 = REGEX_TALLOC (csize2 + `1`, CHAR_T);
5763	mbs_offset2 = REGEX_TALLOC (csize2 + `1`, int);
5764	is_binary = REGEX_TALLOC (csize2 + `1`, char);
5765	if (!string2 \|\| !mbs_offset2 \|\| !is_binary)
5766	{
5767	FREE_VAR (string1);
5768	FREE_VAR (mbs_offset1);
5769	FREE_VAR (string2);
5770	FREE_VAR (mbs_offset2);
5771	FREE_VAR (is_binary);
5772	return -`2`;
5773	}
5774	size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5775	mbs_offset2, is_binary);
5776	string2[size2] = L`'\0'`; / for a sentinel /
5777	FREE_VAR (is_binary);
5778	}
5779	}
5780
5781	/ We need to cast pattern to (wchar_t), because we casted this compiled
5782	pattern to (char) in regex_compile. /
5783	p = pattern = (CHAR_T*)bufp->buffer;
5784	pend = (CHAR_T*)(bufp->buffer + bufp->used);
5785
5786	#endif /* WCHAR */
5787
5788	/ Initialize subexpression text positions to -1 to mark ones that no*
5789	start_memory/stop_memory has been seen for. Also initialize the
5790	register information struct. /*
5791	for (mcnt = `1`; (unsigned) mcnt < num_regs; mcnt++)
5792	{
5793	regstart[mcnt] = regend[mcnt]
5794	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5795
5796	REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5797	IS_ACTIVE (reg_info[mcnt]) = `0`;
5798	MATCHED_SOMETHING (reg_info[mcnt]) = `0`;
5799	EVER_MATCHED_SOMETHING (reg_info[mcnt]) = `0`;
5800	}
5801
5802	/ We move `string1' into `string2' if the latter's empty -- but not if*
5803	`string1' is null. /*
5804	if (size2 == `0` && string1 != NULL)
5805	{
5806	string2 = string1;
5807	size2 = size1;
5808	string1 = `0`;
5809	size1 = `0`;
5810	#ifdef WCHAR
5811	mbs_offset2 = mbs_offset1;
5812	csize2 = csize1;
5813	mbs_offset1 = NULL;
5814	csize1 = `0`;
5815	#endif
5816	}
5817	end1 = string1 + size1;
5818	end2 = string2 + size2;
5819
5820	/ Compute where to stop matching, within the two strings. /
5821	#ifdef WCHAR
5822	if (stop <= csize1)
5823	{
5824	mcnt = count_mbs_length(mbs_offset1, stop);
5825	end_match_1 = string1 + mcnt;
5826	end_match_2 = string2;
5827	}
5828	else
5829	{
5830	if (stop > csize1 + csize2)
5831	stop = csize1 + csize2;
5832	end_match_1 = end1;
5833	mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5834	end_match_2 = string2 + mcnt;
5835	}
5836	if (mcnt < `0`)
5837	{ / count_mbs_length return error. /
5838	FREE_VARIABLES ();
5839	return -`1`;
5840	}
5841	#else
5842	if (stop <= size1)
5843	{
5844	end_match_1 = string1 + stop;
5845	end_match_2 = string2;
5846	}
5847	else
5848	{
5849	end_match_1 = end1;
5850	end_match_2 = string2 + stop - size1;
5851	}
5852	#endif /* WCHAR */
5853
5854	/ `p' scans through the pattern as `d' scans through the data.*
5855	`dend' is the end of the input string that `d' points within. `d'
5856	is advanced into the following input string whenever necessary, but
5857	this happens before fetching; therefore, at the beginning of the
5858	loop, `d' can be pointing at the end of a string, but it cannot
5859	equal `string2'. /*
5860	#ifdef WCHAR
5861	if (size1 > `0` && pos <= csize1)
5862	{
5863	mcnt = count_mbs_length(mbs_offset1, pos);
5864	d = string1 + mcnt;
5865	dend = end_match_1;
5866	}
5867	else
5868	{
5869	mcnt = count_mbs_length(mbs_offset2, pos-csize1);
5870	d = string2 + mcnt;
5871	dend = end_match_2;
5872	}
5873
5874	if (mcnt < `0`)
5875	{ / count_mbs_length return error. /
5876	FREE_VARIABLES ();
5877	return -`1`;
5878	}
5879	#else
5880	if (size1 > `0` && pos <= size1)
5881	{
5882	d = string1 + pos;
5883	dend = end_match_1;
5884	}
5885	else
5886	{
5887	d = string2 + pos - size1;
5888	dend = end_match_2;
5889	}
5890	#endif /* WCHAR */
5891
5892	DEBUG_PRINT1 ("The compiled pattern is:\n");
5893	DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5894	DEBUG_PRINT1 ("The string to match is: `");
5895	DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5896	DEBUG_PRINT1 ("'\n");
5897
5898	/ This loops over pattern commands. It exits by returning from the*
5899	function if the match is complete, or it drops through if the match
5900	fails at this starting point in the input data. /*
5901	for (;;)
5902	{
5903	#ifdef _LIBC
5904	DEBUG_PRINT2 ("\n%p: ", p);
5905	#else
5906	DEBUG_PRINT2 ("\n0x%x: ", p);
5907	#endif
5908
5909	if (p == pend)
5910	{ / End of pattern means we might have succeeded. /
5911	DEBUG_PRINT1 ("end of pattern ... ");
5912
5913	/ If we haven't matched the entire string, and we want the*
5914	longest match, try backtracking. /*
5915	if (d != end_match_2)
5916	{
5917	/ 1 if this match ends in the same string (string1 or string2)*
5918	as the best previous match. /*
5919	boolean same_str_p;
5920
5921	/ 1 if this match is the best seen so far. /
5922	boolean best_match_p;
5923
5924	same_str_p = (FIRST_STRING_P (match_end)
5925	== MATCHING_IN_FIRST_STRING);
5926
5927	/ AIX compiler got confused when this was combined*
5928	with the previous declaration. /*
5929	if (same_str_p)
5930	best_match_p = d > match_end;
5931	else
5932	best_match_p = !MATCHING_IN_FIRST_STRING;
5933
5934	DEBUG_PRINT1 ("backtracking.\n");
5935
5936	if (!FAIL_STACK_EMPTY ())
5937	{ / More failure points to try. /
5938
5939	/ If exceeds best match so far, save it. /
5940	if (!best_regs_set \|\| best_match_p)
5941	{
5942	best_regs_set = true;
5943	match_end = d;
5944
5945	DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5946
5947	for (mcnt = `1`; (unsigned) mcnt < num_regs; mcnt++)
5948	{
5949	best_regstart[mcnt] = regstart[mcnt];
5950	best_regend[mcnt] = regend[mcnt];
5951	}
5952	}
5953	goto fail;
5954	}
5955
5956	/ If no failure points, don't restore garbage. And if*
5957	last match is real best match, don't restore second
5958	best one. /*
5959	else if (best_regs_set && !best_match_p)
5960	{
5961	restore_best_regs:
5962	/ Restore best match. It may happen that `dend ==*
5963	end_match_1' while the restored d is in string2.
5964	For example, the pattern `x.y.z' against the
5965	strings `x-' and `y-z-', if the two strings are
5966	not consecutive in memory. /*
5967	DEBUG_PRINT1 ("Restoring best registers.\n");
5968
5969	d = match_end;
5970	dend = ((d >= string1 && d <= end1)
5971	? end_match_1 : end_match_2);
5972
5973	for (mcnt = `1`; (unsigned) mcnt < num_regs; mcnt++)
5974	{
5975	regstart[mcnt] = best_regstart[mcnt];
5976	regend[mcnt] = best_regend[mcnt];
5977	}
5978	}
5979	} / d != end_match_2 /
5980
5981	succeed_label:
5982	DEBUG_PRINT1 ("Accepting match.\n");
5983	/ If caller wants register contents data back, do it. /
5984	if (regs && !bufp->no_sub)
5985	{
5986	/ Have the register data arrays been allocated? /
5987	if (bufp->regs_allocated == REGS_UNALLOCATED)
5988	{ / No. So allocate them with malloc. We need one*
5989	extra element beyond `num_regs' for the `-1' marker
5990	GNU code uses. /*
5991	regs->num_regs = MAX (RE_NREGS, num_regs + `1`);
5992	regs->start = TALLOC (regs->num_regs, regoff_t);
5993	regs->end = TALLOC (regs->num_regs, regoff_t);
5994	if (regs->start == NULL \|\| regs->end == NULL)
5995	{
5996	FREE_VARIABLES ();
5997	return -`2`;
5998	}
5999	bufp->regs_allocated = REGS_REALLOCATE;
6000	}
6001	else if (bufp->regs_allocated == REGS_REALLOCATE)
6002	{ / Yes. If we need more elements than were already*
6003	allocated, reallocate them. If we need fewer, just
6004	leave it alone. /*
6005	if (regs->num_regs < num_regs + `1`)
6006	{
6007	regs->num_regs = num_regs + `1`;
6008	RETALLOC (regs->start, regs->num_regs, regoff_t);
6009	RETALLOC (regs->end, regs->num_regs, regoff_t);
6010	if (regs->start == NULL \|\| regs->end == NULL)
6011	{
6012	FREE_VARIABLES ();
6013	return -`2`;
6014	}
6015	}
6016	}
6017	else
6018	{
6019	/ These braces fend off a "empty body in an else-statement"*
6020	warning under GCC when assert expands to nothing. /*
6021	assert (bufp->regs_allocated == REGS_FIXED);
6022	}
6023
6024	/ Convert the pointer data in `regstart' and `regend' to*
6025	indices. Register zero has to be set differently,
6026	since we haven't kept track of any info for it. /*
6027	if (regs->num_regs > `0`)
6028	{
6029	regs->start[`0`] = pos;
6030	#ifdef WCHAR
6031	if (MATCHING_IN_FIRST_STRING)
6032	regs->end[`0`] = mbs_offset1 != NULL ?
6033	mbs_offset1[d-string1] : `0`;
6034	else
6035	regs->end[`0`] = csize1 + (mbs_offset2 != NULL ?
6036	mbs_offset2[d-string2] : `0`);
6037	#else
6038	regs->end[`0`] = (MATCHING_IN_FIRST_STRING
6039	? ((regoff_t) (d - string1))
6040	: ((regoff_t) (d - string2 + size1)));
6041	#endif /* WCHAR */
6042	}
6043
6044	/ Go through the first `min (num_regs, regs->num_regs)'*
6045	registers, since that is all we initialized. /*
6046	for (mcnt = `1`; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6047	mcnt++)
6048	{
6049	if (REG_UNSET (regstart[mcnt]) \|\| REG_UNSET (regend[mcnt]))
6050	regs->start[mcnt] = regs->end[mcnt] = -`1`;
6051	else
6052	{
6053	regs->start[mcnt]
6054	= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6055	regs->end[mcnt]
6056	= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6057	}
6058	}
6059
6060	/ If the regs structure we return has more elements than*
6061	were in the pattern, set the extra elements to -1. If
6062	we (re)allocated the registers, this is the case,
6063	because we always allocate enough to have at least one
6064	-1 at the end. /*
6065	for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6066	regs->start[mcnt] = regs->end[mcnt] = -`1`;
6067	} / regs && !bufp->no_sub /
6068
6069	DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6070	nfailure_points_pushed, nfailure_points_popped,
6071	nfailure_points_pushed - nfailure_points_popped);
6072	DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6073
6074	#ifdef WCHAR
6075	if (MATCHING_IN_FIRST_STRING)
6076	mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : `0`;
6077	else
6078	mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : `0`) +
6079	csize1;
6080	mcnt -= pos;
6081	#else
6082	mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6083	? string1
6084	: string2 - size1);
6085	#endif /* WCHAR */
6086
6087	DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6088
6089	FREE_VARIABLES ();
6090	return mcnt;
6091	}
6092
6093	/ Otherwise match next pattern command. /
6094	switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6095	{
6096	/ Ignore these. Used to ignore the n of succeed_n's which*
6097	currently have n == 0. /*
6098	case no_op:
6099	DEBUG_PRINT1 ("EXECUTING no_op.\n");
6100	break;
6101
6102	case succeed:
6103	DEBUG_PRINT1 ("EXECUTING succeed.\n");
6104	goto succeed_label;
6105
6106	/ Match the next n pattern characters exactly. The following*
6107	byte in the pattern defines n, and the n bytes after that
6108	are the characters to match. /*
6109	case exactn:
6110	#ifdef MBS_SUPPORT
6111	case exactn_bin:
6112	#endif
6113	mcnt = *p++;
6114	DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6115
6116	/ This is written out as an if-else so we don't waste time*
6117	testing `translate' inside the loop. /*
6118	if (translate)
6119	{
6120	do
6121	{
6122	PREFETCH ();
6123	#ifdef WCHAR
6124	if (*d <= `0xff`)
6125	{
6126	if ((UCHAR_T) translate[(unsigned char) *d++]
6127	!= (UCHAR_T) *p++)
6128	goto fail;
6129	}
6130	else
6131	{
6132	if (d++ != (CHAR_T) p++)
6133	goto fail;
6134	}
6135	#else
6136	if ((UCHAR_T) translate[(unsigned char) *d++]
6137	!= (UCHAR_T) *p++)
6138	goto fail;
6139	#endif /* WCHAR */
6140	}
6141	while (--mcnt);
6142	}
6143	else
6144	{
6145	do
6146	{
6147	PREFETCH ();
6148	if (d++ != (CHAR_T) p++) goto fail;
6149	}
6150	while (--mcnt);
6151	}
6152	SET_REGS_MATCHED ();
6153	break;
6154
6155
6156	/ Match any character except possibly a newline or a null. /
6157	case anychar:
6158	DEBUG_PRINT1 ("EXECUTING anychar.\n");
6159
6160	PREFETCH ();
6161
6162	if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == `'\n'`)
6163	\|\| (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == `'\000'`))
6164	goto fail;
6165
6166	SET_REGS_MATCHED ();
6167	DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
6168	d++;
6169	break;
6170
6171
6172	case charset:
6173	case charset_not:
6174	{
6175	register UCHAR_T c;
6176	#ifdef WCHAR
6177	unsigned int i, char_class_length, coll_symbol_length,
6178	equiv_class_length, ranges_length, chars_length, length;
6179	CHAR_T workp, workp2, *charset_top;
6180	#define WORK_BUFFER_SIZE 128
6181	CHAR_T str_buf[WORK_BUFFER_SIZE];
6182	# ifdef _LIBC
6183	uint32_t nrules;
6184	# endif /* _LIBC */
6185	#endif /* WCHAR */
6186	boolean negate = (re_opcode_t) *(p - `1`) == charset_not;
6187
6188	DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
6189	PREFETCH ();
6190	c = TRANSLATE (d); /* The character to match. /
6191	#ifdef WCHAR
6192	# ifdef _LIBC
6193	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6194	# endif /* _LIBC */
6195	charset_top = p - `1`;
6196	char_class_length = *p++;
6197	coll_symbol_length = *p++;
6198	equiv_class_length = *p++;
6199	ranges_length = *p++;
6200	chars_length = *p++;
6201	/ p points charset[6], so the address of the next instruction*
6202	(charset[l+m+n+2o+k+p']) equals p[l+m+n+2o+p'],*
6203	where l=length of char_classes, m=length of collating_symbol,
6204	n=equivalence_class, o=length of char_range,
6205	p'=length of character. /*
6206	workp = p;
6207	/ Update p to indicate the next instruction. /
6208	p += char_class_length + coll_symbol_length+ equiv_class_length +
6209	`2`*ranges_length + chars_length;
6210
6211	/ match with char_class? /
6212	for (i = `0`; i < char_class_length ; i += CHAR_CLASS_SIZE)
6213	{
6214	wctype_t wctype;
6215	uintptr_t alignedp = ((uintptr_t)workp
6216	+ __alignof__(wctype_t) - `1`)
6217	& ~(uintptr_t)(__alignof__(wctype_t) - `1`);
6218	wctype = ((wctype_t)alignedp);
6219	workp += CHAR_CLASS_SIZE;
6220	# ifdef _LIBC
6221	if (__iswctype((wint_t)c, wctype))
6222	goto char_set_matched;
6223	# else
6224	if (iswctype((wint_t)c, wctype))
6225	goto char_set_matched;
6226	# endif
6227	}
6228
6229	/ match with collating_symbol? /
6230	# ifdef _LIBC
6231	if (nrules != `0`)
6232	{
6233	const unsigned char extra = (const* unsigned char *)
6234	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6235
6236	for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6237	workp++)
6238	{
6239	int32_t *wextra;
6240	wextra = (int32_t)(extra + workp++);
6241	for (i = `0`; i < *wextra; ++i)
6242	if (TRANSLATE(d[i]) != wextra[`1` + i])
6243	break;
6244
6245	if (i == *wextra)
6246	{
6247	/ Update d, however d will be incremented at*
6248	char_set_matched:, we decrement d here. /*
6249	d += i - `1`;
6250	goto char_set_matched;
6251	}
6252	}
6253	}
6254	else / (nrules == 0) /
6255	# endif
6256	/ If we can't look up collation data, we use wcscoll*
6257	instead. /*
6258	{
6259	for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6260	{
6261	const CHAR_T backup_d = d, backup_dend = dend;
6262	# ifdef _LIBC
6263	length = __wcslen (workp);
6264	# else
6265	length = wcslen (workp);
6266	# endif
6267
6268	/ If wcscoll(the collating symbol, whole string) > 0,*
6269	any substring of the string never match with the
6270	collating symbol. /*
6271	# ifdef _LIBC
6272	if (__wcscoll (workp, d) > `0`)
6273	# else
6274	if (wcscoll (workp, d) > `0`)
6275	# endif
6276	{
6277	workp += length + `1`;
6278	continue;
6279	}
6280
6281	/ First, we compare the collating symbol with*
6282	the first character of the string.
6283	If it don't match, we add the next character to
6284	the compare buffer in turn. /*
6285	for (i = `0` ; i < WORK_BUFFER_SIZE-`1` ; i++, d++)
6286	{
6287	int match;
6288	if (d == dend)
6289	{
6290	if (dend == end_match_2)
6291	break;
6292	d = string2;
6293	dend = end_match_2;
6294	}
6295
6296	/ add next character to the compare buffer. /
6297	str_buf[i] = TRANSLATE(*d);
6298	str_buf[i+`1`] = `'\0'`;
6299
6300	# ifdef _LIBC
6301	match = __wcscoll (workp, str_buf);
6302	# else
6303	match = wcscoll (workp, str_buf);
6304	# endif
6305	if (match == `0`)
6306	goto char_set_matched;
6307
6308	if (match < `0`)
6309	/ (str_buf > workp) indicate (str_buf + X > workp),*
6310	because for all X (str_buf + X > str_buf).
6311	So we don't need continue this loop. /*
6312	break;
6313
6314	/ Otherwise(str_buf < workp),*
6315	(str_buf+next_character) may equals (workp).
6316	So we continue this loop. /*
6317	}
6318	/ not matched /
6319	d = backup_d;
6320	dend = backup_dend;
6321	workp += length + `1`;
6322	}
6323	}
6324	/ match with equivalence_class? /
6325	# ifdef _LIBC
6326	if (nrules != `0`)
6327	{
6328	const CHAR_T backup_d = d, backup_dend = dend;
6329	/ Try to match the equivalence class against*
6330	those known to the collate implementation. /*
6331	const int32_t *table;
6332	const int32_t *weights;
6333	const int32_t *extra;
6334	const int32_t *indirect;
6335	int32_t idx, idx2;
6336	wint_t *cp;
6337	size_t len;
6338
6339	/ This #include defines a local function! /
6340	# include <locale/weightwc.h>
6341
6342	table = (const int32_t *)
6343	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6344	weights = (const wint_t *)
6345	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6346	extra = (const wint_t *)
6347	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6348	indirect = (const int32_t *)
6349	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6350
6351	/ Write 1 collating element to str_buf, and*
6352	get its index. /*
6353	idx2 = `0`;
6354
6355	for (i = `0` ; idx2 == `0` && i < WORK_BUFFER_SIZE - `1`; i++)
6356	{
6357	cp = (wint_t*)str_buf;
6358	if (d == dend)
6359	{
6360	if (dend == end_match_2)
6361	break;
6362	d = string2;
6363	dend = end_match_2;
6364	}
6365	str_buf[i] = TRANSLATE(*(d+i));
6366	str_buf[i+`1`] = `'\0'`; / sentinel /
6367	idx2 = findidx ((const wint_t**)&cp);
6368	}
6369
6370	/ Update d, however d will be incremented at*
6371	char_set_matched:, we decrement d here. /*
6372	d = backup_d + ((wchar_t)cp - (wchar_t**)str_buf - `1`);
6373	if (d >= dend)
6374	{
6375	if (dend == end_match_2)
6376	d = dend;
6377	else
6378	{
6379	d = string2;
6380	dend = end_match_2;
6381	}
6382	}
6383
6384	len = weights[idx2];
6385
6386	for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6387	workp++)
6388	{
6389	idx = (int32_t)*workp;
6390	/ We already checked idx != 0 in regex_compile. /
6391
6392	if (idx2 != `0` && len == weights[idx])
6393	{
6394	int cnt = `0`;
6395	while (cnt < len && (weights[idx + `1` + cnt]
6396	== weights[idx2 + `1` + cnt]))
6397	++cnt;
6398
6399	if (cnt == len)
6400	goto char_set_matched;
6401	}
6402	}
6403	/ not matched /
6404	d = backup_d;
6405	dend = backup_dend;
6406	}
6407	else / (nrules == 0) /
6408	# endif
6409	/ If we can't look up collation data, we use wcscoll*
6410	instead. /*
6411	{
6412	for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6413	{
6414	const CHAR_T backup_d = d, backup_dend = dend;
6415	# ifdef _LIBC
6416	length = __wcslen (workp);
6417	# else
6418	length = wcslen (workp);
6419	# endif
6420
6421	/ If wcscoll(the collating symbol, whole string) > 0,*
6422	any substring of the string never match with the
6423	collating symbol. /*
6424	# ifdef _LIBC
6425	if (__wcscoll (workp, d) > `0`)
6426	# else
6427	if (wcscoll (workp, d) > `0`)
6428	# endif
6429	{
6430	workp += length + `1`;
6431	break;
6432	}
6433
6434	/ First, we compare the equivalence class with*
6435	the first character of the string.
6436	If it don't match, we add the next character to
6437	the compare buffer in turn. /*
6438	for (i = `0` ; i < WORK_BUFFER_SIZE - `1` ; i++, d++)
6439	{
6440	int match;
6441	if (d == dend)
6442	{
6443	if (dend == end_match_2)
6444	break;
6445	d = string2;
6446	dend = end_match_2;
6447	}
6448
6449	/ add next character to the compare buffer. /
6450	str_buf[i] = TRANSLATE(*d);
6451	str_buf[i+`1`] = `'\0'`;
6452
6453	# ifdef _LIBC
6454	match = __wcscoll (workp, str_buf);
6455	# else
6456	match = wcscoll (workp, str_buf);
6457	# endif
6458
6459	if (match == `0`)
6460	goto char_set_matched;
6461
6462	if (match < `0`)
6463	/ (str_buf > workp) indicate (str_buf + X > workp),*
6464	because for all X (str_buf + X > str_buf).
6465	So we don't need continue this loop. /*
6466	break;
6467
6468	/ Otherwise(str_buf < workp),*
6469	(str_buf+next_character) may equals (workp).
6470	So we continue this loop. /*
6471	}
6472	/ not matched /
6473	d = backup_d;
6474	dend = backup_dend;
6475	workp += length + `1`;
6476	}
6477	}
6478
6479	/ match with char_range? /
6480	# ifdef _LIBC
6481	if (nrules != `0`)
6482	{
6483	uint32_t collseqval;
6484	const char collseq = (const* char *)
6485	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6486
6487	collseqval = collseq_table_lookup (collseq, c);
6488
6489	for (; workp < p - chars_length ;)
6490	{
6491	uint32_t start_val, end_val;
6492
6493	/ We already compute the collation sequence value*
6494	of the characters (or collating symbols). /*
6495	start_val = (uint32_t) workp++; /* range_start /
6496	end_val = (uint32_t) workp++; /* range_end /
6497
6498	if (start_val <= collseqval && collseqval <= end_val)
6499	goto char_set_matched;
6500	}
6501	}
6502	else
6503	# endif
6504	{
6505	/ We set range_start_char at str_buf[0], range_end_char*
6506	at str_buf[4], and compared char at str_buf[2]. /*
6507	str_buf[`1`] = `0`;
6508	str_buf[`2`] = c;
6509	str_buf[`3`] = `0`;
6510	str_buf[`5`] = `0`;
6511	for (; workp < p - chars_length ;)
6512	{
6513	wchar_t range_start_char, range_end_char;
6514
6515	/ match if (range_start_char <= c <= range_end_char). /
6516
6517	/ If range_start(or end) < 0, we assume -range_start(end)*
6518	is the offset of the collating symbol which is specified
6519	as the character of the range start(end). /*
6520
6521	/ range_start /
6522	if (*workp < `0`)
6523	range_start_char = charset_top - (*workp++);
6524	else
6525	{
6526	str_buf[`0`] = *workp++;
6527	range_start_char = str_buf;
6528	}
6529
6530	/ range_end /
6531	if (*workp < `0`)
6532	range_end_char = charset_top - (*workp++);
6533	else
6534	{
6535	str_buf[`4`] = *workp++;
6536	range_end_char = str_buf + `4`;
6537	}
6538
6539	# ifdef _LIBC
6540	if (__wcscoll (range_start_char, str_buf+`2`) <= `0`
6541	&& __wcscoll (str_buf+`2`, range_end_char) <= `0`)
6542	# else
6543	if (wcscoll (range_start_char, str_buf+`2`) <= `0`
6544	&& wcscoll (str_buf+`2`, range_end_char) <= `0`)
6545	# endif
6546	goto char_set_matched;
6547	}
6548	}
6549
6550	/ match with char? /
6551	for (; workp < p ; workp++)
6552	if (c == *workp)
6553	goto char_set_matched;
6554
6555	negate = !negate;
6556
6557	char_set_matched:
6558	if (negate) goto fail;
6559	#else
6560	/ Cast to `unsigned' instead of `unsigned char' in case the*
6561	bit list is a full 32 bytes long. /*
6562	if (c < (unsigned) (p BYTEWIDTH)
6563	&& p[`1` + c / BYTEWIDTH] & (`1` << (c % BYTEWIDTH)))
6564	negate = !negate;
6565
6566	p += `1` + *p;
6567
6568	if (!negate) goto fail;
6569	#undef WORK_BUFFER_SIZE
6570	#endif /* WCHAR */
6571	SET_REGS_MATCHED ();
6572	d++;
6573	break;
6574	}
6575
6576
6577	/ The beginning of a group is represented by start_memory.*
6578	The arguments are the register number in the next byte, and the
6579	number of groups inner to this one in the next. The text
6580	matched within the group is recorded (in the internal
6581	registers data structure) under the register number. /*
6582	case start_memory:
6583	DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6584	(long int) p, (long* int) p[`1`]);
6585
6586	/ Find out if this group can match the empty string. /
6587	p1 = p; / To send to group_match_null_string_p. /
6588
6589	if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6590	REG_MATCH_NULL_STRING_P (reg_info[*p])
6591	= PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6592
6593	/ Save the position in the string where we were the last time*
6594	we were at this open-group operator in case the group is
6595	operated upon by a repetition operator, e.g., with `(a)b'
6596	against `ab'; then we want to ignore where we are now in
6597	the string in case this attempt to match fails. /*
6598	old_regstart[p] = REG_MATCH_NULL_STRING_P (reg_info[p])
6599	? REG_UNSET (regstart[p]) ? d : regstart[p]
6600	: regstart[*p];
6601	DEBUG_PRINT2 (" old_regstart: %d\n",
6602	POINTER_TO_OFFSET (old_regstart[*p]));
6603
6604	regstart[*p] = d;
6605	DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6606
6607	IS_ACTIVE (reg_info[*p]) = `1`;
6608	MATCHED_SOMETHING (reg_info[*p]) = `0`;
6609
6610	/ Clear this whenever we change the register activity status. /
6611	set_regs_matched_done = `0`;
6612
6613	/ This is the new highest active register. /
6614	highest_active_reg = *p;
6615
6616	/ If nothing was active before, this is the new lowest active*
6617	register. /*
6618	if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6619	lowest_active_reg = *p;
6620
6621	/ Move past the register number and inner group count. /
6622	p += `2`;
6623	just_past_start_mem = p;
6624
6625	break;
6626
6627
6628	/ The stop_memory opcode represents the end of a group. Its*
6629	arguments are the same as start_memory's: the register
6630	number, and the number of inner groups. /*
6631	case stop_memory:
6632	DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6633	(long int) p, (long* int) p[`1`]);
6634
6635	/ We need to save the string position the last time we were at*
6636	this close-group operator in case the group is operated
6637	upon by a repetition operator, e.g., with `((a)(b))'*
6638	against `aba'; then we want to ignore where we are now in
6639	the string in case this attempt to match fails. /*
6640	old_regend[p] = REG_MATCH_NULL_STRING_P (reg_info[p])
6641	? REG_UNSET (regend[p]) ? d : regend[p]
6642	: regend[*p];
6643	DEBUG_PRINT2 (" old_regend: %d\n",
6644	POINTER_TO_OFFSET (old_regend[*p]));
6645
6646	regend[*p] = d;
6647	DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6648
6649	/ This register isn't active anymore. /
6650	IS_ACTIVE (reg_info[*p]) = `0`;
6651
6652	/ Clear this whenever we change the register activity status. /
6653	set_regs_matched_done = `0`;
6654
6655	/ If this was the only register active, nothing is active*
6656	anymore. /*
6657	if (lowest_active_reg == highest_active_reg)
6658	{
6659	lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6660	highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6661	}
6662	else
6663	{ / We must scan for the new highest active register, since*
6664	it isn't necessarily one less than now: consider
6665	(a(b)c(d(e)f)g). When group 3 ends, after the f), the
6666	new highest active register is 1. /*
6667	UCHAR_T r = *p - `1`;
6668	while (r > `0` && !IS_ACTIVE (reg_info[r]))
6669	r--;
6670
6671	/ If we end up at register zero, that means that we saved*
6672	the registers as the result of an `on_failure_jump', not
6673	a `start_memory', and we jumped to past the innermost
6674	`stop_memory'. For example, in ((.)) we save*
6675	registers 1 and 2 as a result of the , but when we pop*
6676	back to the second ), we are at the stop_memory 1.
6677	Thus, nothing is active. /*
6678	if (r == `0`)
6679	{
6680	lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6681	highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6682	}
6683	else
6684	highest_active_reg = r;
6685	}
6686
6687	/ If just failed to match something this time around with a*
6688	group that's operated on by a repetition operator, try to
6689	force exit from the ``loop'', and restore the register
6690	information for this group that we had before trying this
6691	last match. /*
6692	if ((!MATCHED_SOMETHING (reg_info[*p])
6693	\|\| just_past_start_mem == p - `1`)
6694	&& (p + `2`) < pend)
6695	{
6696	boolean is_a_jump_n = false;
6697
6698	p1 = p + `2`;
6699	mcnt = `0`;
6700	switch ((re_opcode_t) *p1++)
6701	{
6702	case jump_n:
6703	is_a_jump_n = true;
6704	/ Fall through. /
6705	case pop_failure_jump:
6706	case maybe_pop_jump:
6707	case jump:
6708	case dummy_failure_jump:
6709	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6710	if (is_a_jump_n)
6711	p1 += OFFSET_ADDRESS_SIZE;
6712	break;
6713
6714	default:
6715	/ do nothing / ;
6716	}
6717	p1 += mcnt;
6718
6719	/ If the next operation is a jump backwards in the pattern*
6720	to an on_failure_jump right before the start_memory
6721	corresponding to this stop_memory, exit from the loop
6722	by forcing a failure after pushing on the stack the
6723	on_failure_jump's jump in the pattern, and d. /*
6724	if (mcnt < `0` && (re_opcode_t) *p1 == on_failure_jump
6725	&& (re_opcode_t) p1[`1`+OFFSET_ADDRESS_SIZE] == start_memory
6726	&& p1[`2`+OFFSET_ADDRESS_SIZE] == *p)
6727	{
6728	/ If this group ever matched anything, then restore*
6729	what its registers were before trying this last
6730	failed match, e.g., with `(a)b' against `ab' for
6731	regstart[1], and, e.g., with `((a)(b))'*
6732	against `aba' for regend[3].
6733
6734	Also restore the registers for inner groups for,
6735	e.g., `((a)(b))' against `aba' (register 3 would*
6736	otherwise get trashed). /*
6737
6738	if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6739	{
6740	unsigned r;
6741
6742	EVER_MATCHED_SOMETHING (reg_info[*p]) = `0`;
6743
6744	/ Restore this and inner groups' (if any) registers. /
6745	for (r = p; r < (unsigned) p + (unsigned) *(p + `1`);
6746	r++)
6747	{
6748	regstart[r] = old_regstart[r];
6749
6750	/ xx why this test? /
6751	if (old_regend[r] >= regstart[r])
6752	regend[r] = old_regend[r];
6753	}
6754	}
6755	p1++;
6756	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6757	PUSH_FAILURE_POINT (p1 + mcnt, d, -`2`);
6758
6759	goto fail;
6760	}
6761	}
6762
6763	/ Move past the register number and the inner group count. /
6764	p += `2`;
6765	break;
6766
6767
6768	/ \<digit> has been turned into a `duplicate' command which is*
6769	followed by the numeric value of <digit> as the register number. /*
6770	case duplicate:
6771	{
6772	register const CHAR_T d2, dend2;
6773	int regno = p++; /* Get which register to match against. /
6774	DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6775
6776	/ Can't back reference a group which we've never matched. /
6777	if (REG_UNSET (regstart[regno]) \|\| REG_UNSET (regend[regno]))
6778	goto fail;
6779
6780	/ Where in input to try to start matching. /
6781	d2 = regstart[regno];
6782
6783	/ Where to stop matching; if both the place to start and*
6784	the place to stop matching are in the same string, then
6785	set to the place to stop, otherwise, for now have to use
6786	the end of the first string. /*
6787
6788	dend2 = ((FIRST_STRING_P (regstart[regno])
6789	== FIRST_STRING_P (regend[regno]))
6790	? regend[regno] : end_match_1);
6791	for (;;)
6792	{
6793	/ If necessary, advance to next segment in register*
6794	contents. /*
6795	while (d2 == dend2)
6796	{
6797	if (dend2 == end_match_2) break;
6798	if (dend2 == regend[regno]) break;
6799
6800	/ End of string1 => advance to string2. /
6801	d2 = string2;
6802	dend2 = regend[regno];
6803	}
6804	/ At end of register contents => success /
6805	if (d2 == dend2) break;
6806
6807	/ If necessary, advance to next segment in data. /
6808	PREFETCH ();
6809
6810	/ How many characters left in this segment to match. /
6811	mcnt = dend - d;
6812
6813	/ Want how many consecutive characters we can match in*
6814	one shot, so, if necessary, adjust the count. /*
6815	if (mcnt > dend2 - d2)
6816	mcnt = dend2 - d2;
6817
6818	/ Compare that many; failure if mismatch, else move*
6819	past them. /*
6820	if (translate
6821	? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6822	: memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
6823	goto fail;
6824	d += mcnt, d2 += mcnt;
6825
6826	/ Do this because we've match some characters. /
6827	SET_REGS_MATCHED ();
6828	}
6829	}
6830	break;
6831
6832
6833	/ begline matches the empty string at the beginning of the string*
6834	(unless `not_bol' is set in `bufp'), and, if
6835	`newline_anchor' is set, after newlines. /*
6836	case begline:
6837	DEBUG_PRINT1 ("EXECUTING begline.\n");
6838
6839	if (AT_STRINGS_BEG (d))
6840	{
6841	if (!bufp->not_bol) break;
6842	}
6843	else if (d[-`1`] == `'\n'` && bufp->newline_anchor)
6844	{
6845	break;
6846	}
6847	/ In all other cases, we fail. /
6848	goto fail;
6849
6850
6851	/ endline is the dual of begline. /
6852	case endline:
6853	DEBUG_PRINT1 ("EXECUTING endline.\n");
6854
6855	if (AT_STRINGS_END (d))
6856	{
6857	if (!bufp->not_eol) break;
6858	}
6859
6860	/ We have to ``prefetch'' the next character. /
6861	else if ((d == end1 ? string2 : d) == `'\n'`
6862	&& bufp->newline_anchor)
6863	{
6864	break;
6865	}
6866	goto fail;
6867
6868
6869	/ Match at the very beginning of the data. /
6870	case begbuf:
6871	DEBUG_PRINT1 ("EXECUTING begbuf.\n");
6872	if (AT_STRINGS_BEG (d))
6873	break;
6874	goto fail;
6875
6876
6877	/ Match at the very end of the data. /
6878	case endbuf:
6879	DEBUG_PRINT1 ("EXECUTING endbuf.\n");
6880	if (AT_STRINGS_END (d))
6881	break;
6882	goto fail;
6883
6884
6885	/ on_failure_keep_string_jump is used to optimize `.\n'. It
6886	pushes NULL as the value for the string on the stack. Then
6887	`pop_failure_point' will keep the current value for the
6888	string, instead of restoring it. To see why, consider
6889	matching `foo\nbar' against `.\n'. The .* matches the foo;*
6890	then the . fails against the \n. But the next thing we want
6891	to do is match the \n against the \n; if we restored the
6892	string value, we would be back at the foo.
6893
6894	Because this is used only in specific cases, we don't need to
6895	check all the things that `on_failure_jump' does, to make
6896	sure the right things get saved on the stack. Hence we don't
6897	share its code. The only reason to push anything on the
6898	stack at all is that otherwise we would have to change
6899	`anychar's code to do something besides goto fail in this
6900	case; that seems worse than this. /*
6901	case on_failure_keep_string_jump:
6902	DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
6903
6904	EXTRACT_NUMBER_AND_INCR (mcnt, p);
6905	#ifdef _LIBC
6906	DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
6907	#else
6908	DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
6909	#endif
6910
6911	PUSH_FAILURE_POINT (p + mcnt, NULL, -`2`);
6912	break;
6913
6914
6915	/ Uses of on_failure_jump:*
6916
6917	Each alternative starts with an on_failure_jump that points
6918	to the beginning of the next alternative. Each alternative
6919	except the last ends with a jump that in effect jumps past
6920	the rest of the alternatives. (They really jump to the
6921	ending jump of the following alternative, because tensioning
6922	these jumps is a hassle.)
6923
6924	Repeats start with an on_failure_jump that points past both
6925	the repetition text and either the following jump or
6926	pop_failure_jump back to this on_failure_jump. /*
6927	case on_failure_jump:
6928	on_failure:
6929	DEBUG_PRINT1 ("EXECUTING on_failure_jump");
6930
6931	EXTRACT_NUMBER_AND_INCR (mcnt, p);
6932	#ifdef _LIBC
6933	DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
6934	#else
6935	DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
6936	#endif
6937
6938	/ If this on_failure_jump comes right before a group (i.e.,*
6939	the original applied to a group), save the information*
6940	for that group and all inner ones, so that if we fail back
6941	to this point, the group's information will be correct.
6942	For example, in $a$\1, we need the preceding group,
6943	and in $zz\(a$b\)\2, we need the inner group. /*
6944
6945	/ We can't use `p' to check ahead because we push*
6946	a failure point to `p + mcnt' after we do this. /*
6947	p1 = p;
6948
6949	/ We need to skip no_op's before we look for the*
6950	start_memory in case this on_failure_jump is happening as
6951	the result of a completed succeed_n, as in $a$\{1,3\}b\1
6952	against aba. /*
6953	while (p1 < pend && (re_opcode_t) *p1 == no_op)
6954	p1++;
6955
6956	if (p1 < pend && (re_opcode_t) *p1 == start_memory)
6957	{
6958	/ We have a new highest active register now. This will*
6959	get reset at the start_memory we are about to get to,
6960	but we will have saved all the registers relevant to
6961	this repetition op, as described above. /*
6962	highest_active_reg = (p1 + `1`) + (p1 + `2`);
6963	if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6964	lowest_active_reg = *(p1 + `1`);
6965	}
6966
6967	DEBUG_PRINT1 (":\n");
6968	PUSH_FAILURE_POINT (p + mcnt, d, -`2`);
6969	break;
6970
6971
6972	/ A smart repeat ends with `maybe_pop_jump'.*
6973	We change it to either `pop_failure_jump' or `jump'. /*
6974	case maybe_pop_jump:
6975	EXTRACT_NUMBER_AND_INCR (mcnt, p);
6976	DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
6977	{
6978	register UCHAR_T *p2 = p;
6979
6980	/ Compare the beginning of the repeat with what in the*
6981	pattern follows its end. If we can establish that there
6982	is nothing that they would both match, i.e., that we
6983	would have to backtrack because of (as in, e.g., `aa')*
6984	then we can change to pop_failure_jump, because we'll
6985	never have to backtrack.
6986
6987	This is not true in the case of alternatives: in
6988	`(a\|ab)' we do need to backtrack to the `ab' alternative*
6989	(e.g., if the string was `ab'). But instead of trying to
6990	detect that here, the alternative has put on a dummy
6991	failure point which is what we will end up popping. /*
6992
6993	/ Skip over open/close-group commands.*
6994	If what follows this loop is a ...+ construct,
6995	look at what begins its body, since we will have to
6996	match at least one of that. /*
6997	while (`1`)
6998	{
6999	if (p2 + `2` < pend
7000	&& ((re_opcode_t) *p2 == stop_memory
7001	\|\| (re_opcode_t) *p2 == start_memory))
7002	p2 += `3`;
7003	else if (p2 + `2` + `2` * OFFSET_ADDRESS_SIZE < pend
7004	&& (re_opcode_t) *p2 == dummy_failure_jump)
7005	p2 += `2` + `2` * OFFSET_ADDRESS_SIZE;
7006	else
7007	break;
7008	}
7009
7010	p1 = p + mcnt;
7011	/ p1[0] ... p1[2] are the `on_failure_jump' corresponding*
7012	to the `maybe_finalize_jump' of this case. Examine what
7013	follows. /*
7014
7015	/ If we're at the end of the pattern, we can change. /
7016	if (p2 == pend)
7017	{
7018	/ Consider what happens when matching ":$.$"
7019	against ":/". I don't really understand this code
7020	yet. /*
7021	p[-(`1`+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7022	pop_failure_jump;
7023	DEBUG_PRINT1
7024	(" End of pattern: change to `pop_failure_jump'.\n");
7025	}
7026
7027	else if ((re_opcode_t) *p2 == exactn
7028	#ifdef MBS_SUPPORT
7029	\|\| (re_opcode_t) *p2 == exactn_bin
7030	#endif
7031	\|\| (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7032	{
7033	register UCHAR_T c
7034	= *p2 == (UCHAR_T) endline ? `'\n'` : p2[`2`];
7035
7036	if (((re_opcode_t) p1[`1`+OFFSET_ADDRESS_SIZE] == exactn
7037	#ifdef MBS_SUPPORT
7038	\|\| (re_opcode_t) p1[`1`+OFFSET_ADDRESS_SIZE] == exactn_bin
7039	#endif
7040	) && p1[`3`+OFFSET_ADDRESS_SIZE] != c)
7041	{
7042	p[-(`1`+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7043	pop_failure_jump;
7044	#ifdef WCHAR
7045	DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
7046	(wint_t) c,
7047	(wint_t) p1[`3`+OFFSET_ADDRESS_SIZE]);
7048	#else
7049	DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
7050	(char) c,
7051	(char) p1[`3`+OFFSET_ADDRESS_SIZE]);
7052	#endif
7053	}
7054
7055	#ifndef WCHAR
7056	else if ((re_opcode_t) p1[`3`] == charset
7057	\|\| (re_opcode_t) p1[`3`] == charset_not)
7058	{
7059	int negate = (re_opcode_t) p1[`3`] == charset_not;
7060
7061	if (c < (unsigned) (p1[`4`] * BYTEWIDTH)
7062	&& p1[`5` + c / BYTEWIDTH] & (`1` << (c % BYTEWIDTH)))
7063	negate = !negate;
7064
7065	/ `negate' is equal to 1 if c would match, which means*
7066	that we can't change to pop_failure_jump. /*
7067	if (!negate)
7068	{
7069	p[-`3`] = (unsigned char) pop_failure_jump;
7070	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7071	}
7072	}
7073	#endif /* not WCHAR */
7074	}
7075	#ifndef WCHAR
7076	else if ((re_opcode_t) *p2 == charset)
7077	{
7078	/ We win if the first character of the loop is not part*
7079	of the charset. /*
7080	if ((re_opcode_t) p1[`3`] == exactn
7081	&& ! ((int) p2[`1`] * BYTEWIDTH > (int) p1[`5`]
7082	&& (p2[`2` + p1[`5`] / BYTEWIDTH]
7083	& (`1` << (p1[`5`] % BYTEWIDTH)))))
7084	{
7085	p[-`3`] = (unsigned char) pop_failure_jump;
7086	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7087	}
7088
7089	else if ((re_opcode_t) p1[`3`] == charset_not)
7090	{
7091	int idx;
7092	/ We win if the charset_not inside the loop*
7093	lists every character listed in the charset after. /*
7094	for (idx = `0`; idx < (int) p2[`1`]; idx++)
7095	if (! (p2[`2` + idx] == `0`
7096	\|\| (idx < (int) p1[`4`]
7097	&& ((p2[`2` + idx] & ~ p1[`5` + idx]) == `0`))))
7098	break;
7099
7100	if (idx == p2[`1`])
7101	{
7102	p[-`3`] = (unsigned char) pop_failure_jump;
7103	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7104	}
7105	}
7106	else if ((re_opcode_t) p1[`3`] == charset)
7107	{
7108	int idx;
7109	/ We win if the charset inside the loop*
7110	has no overlap with the one after the loop. /*
7111	for (idx = `0`;
7112	idx < (int) p2[`1`] && idx < (int) p1[`4`];
7113	idx++)
7114	if ((p2[`2` + idx] & p1[`5` + idx]) != `0`)
7115	break;
7116
7117	if (idx == p2[`1`] \|\| idx == p1[`4`])
7118	{
7119	p[-`3`] = (unsigned char) pop_failure_jump;
7120	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7121	}
7122	}
7123	}
7124	#endif /* not WCHAR */
7125	}
7126	p -= OFFSET_ADDRESS_SIZE; / Point at relative address again. /
7127	if ((re_opcode_t) p[-`1`] != pop_failure_jump)
7128	{
7129	p[-`1`] = (UCHAR_T) jump;
7130	DEBUG_PRINT1 (" Match => jump.\n");
7131	goto unconditional_jump;
7132	}
7133	/ Fall through. /
7134
7135
7136	/ The end of a simple repeat has a pop_failure_jump back to*
7137	its matching on_failure_jump, where the latter will push a
7138	failure point. The pop_failure_jump takes off failure
7139	points put on by this pop_failure_jump's matching
7140	on_failure_jump; we got through the pattern to here from the
7141	matching on_failure_jump, so didn't fail. /*
7142	case pop_failure_jump:
7143	{
7144	/ We need to pass separate storage for the lowest and*
7145	highest registers, even though we don't care about the
7146	actual values. Otherwise, we will restore only one
7147	register from the stack, since lowest will == highest in
7148	`pop_failure_point'. /*
7149	active_reg_t dummy_low_reg, dummy_high_reg;
7150	UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
7151	const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
7152
7153	DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7154	POP_FAILURE_POINT (sdummy, pdummy,
7155	dummy_low_reg, dummy_high_reg,
7156	reg_dummy, reg_dummy, reg_info_dummy);
7157	}
7158	/ Fall through. /
7159
7160	unconditional_jump:
7161	#ifdef _LIBC
7162	DEBUG_PRINT2 ("\n%p: ", p);
7163	#else
7164	DEBUG_PRINT2 ("\n0x%x: ", p);
7165	#endif
7166	/ Note fall through. /
7167
7168	/ Unconditionally jump (without popping any failure points). /
7169	case jump:
7170	EXTRACT_NUMBER_AND_INCR (mcnt, p); / Get the amount to jump. /
7171	DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7172	p += mcnt; / Do the jump. /
7173	#ifdef _LIBC
7174	DEBUG_PRINT2 ("(to %p).\n", p);
7175	#else
7176	DEBUG_PRINT2 ("(to 0x%x).\n", p);
7177	#endif
7178	break;
7179
7180
7181	/ We need this opcode so we can detect where alternatives end*
7182	in `group_match_null_string_p' et al. /*
7183	case jump_past_alt:
7184	DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7185	goto unconditional_jump;
7186
7187
7188	/ Normally, the on_failure_jump pushes a failure point, which*
7189	then gets popped at pop_failure_jump. We will end up at
7190	pop_failure_jump, also, and with a pattern of, say, `a+', we
7191	are skipping over the on_failure_jump, so we have to push
7192	something meaningless for pop_failure_jump to pop. /*
7193	case dummy_failure_jump:
7194	DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7195	/ It doesn't matter what we push for the string here. What*
7196	the code at `fail' tests is the value for the pattern. /*
7197	PUSH_FAILURE_POINT (NULL, NULL, -`2`);
7198	goto unconditional_jump;
7199
7200
7201	/ At the end of an alternative, we need to push a dummy failure*
7202	point in case we are followed by a `pop_failure_jump', because
7203	we don't want the failure point for the alternative to be
7204	popped. For example, matching `(a\|ab)' against `aab'*
7205	requires that we match the `ab' alternative. /*
7206	case push_dummy_failure:
7207	DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7208	/ See comments just above at `dummy_failure_jump' about the*
7209	two zeroes. /*
7210	PUSH_FAILURE_POINT (NULL, NULL, -`2`);
7211	break;
7212
7213	/ Have to succeed matching what follows at least n times.*
7214	After that, handle like `on_failure_jump'. /*
7215	case succeed_n:
7216	EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7217	DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7218
7219	assert (mcnt >= `0`);
7220	/ Originally, this is how many times we HAVE to succeed. /
7221	if (mcnt > `0`)
7222	{
7223	mcnt--;
7224	p += OFFSET_ADDRESS_SIZE;
7225	STORE_NUMBER_AND_INCR (p, mcnt);
7226	#ifdef _LIBC
7227	DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7228	, mcnt);
7229	#else
7230	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7231	, mcnt);
7232	#endif
7233	}
7234	else if (mcnt == `0`)
7235	{
7236	#ifdef _LIBC
7237	DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
7238	p + OFFSET_ADDRESS_SIZE);
7239	#else
7240	DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
7241	p + OFFSET_ADDRESS_SIZE);
7242	#endif /* _LIBC */
7243
7244	#ifdef WCHAR
7245	p[`1`] = (UCHAR_T) no_op;
7246	#else
7247	p[`2`] = (UCHAR_T) no_op;
7248	p[`3`] = (UCHAR_T) no_op;
7249	#endif /* WCHAR */
7250	goto on_failure;
7251	}
7252	break;
7253
7254	case jump_n:
7255	EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7256	DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7257
7258	/ Originally, this is how many times we CAN jump. /
7259	if (mcnt)
7260	{
7261	mcnt--;
7262	STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7263
7264	#ifdef _LIBC
7265	DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7266	mcnt);
7267	#else
7268	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7269	mcnt);
7270	#endif /* _LIBC */
7271	goto unconditional_jump;
7272	}
7273	/ If don't have to jump any more, skip over the rest of command. /
7274	else
7275	p += `2` * OFFSET_ADDRESS_SIZE;
7276	break;
7277
7278	case set_number_at:
7279	{
7280	DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7281
7282	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7283	p1 = p + mcnt;
7284	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7285	#ifdef _LIBC
7286	DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
7287	#else
7288	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
7289	#endif
7290	STORE_NUMBER (p1, mcnt);
7291	break;
7292	}
7293
7294	#if 0
7295	/ The DEC Alpha C compiler 3.x generates incorrect code for the*
7296	test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
7297	AT_WORD_BOUNDARY, so this code is disabled. Expanding the
7298	macro and introducing temporary variables works around the bug. /*
7299
7300	case wordbound:
7301	DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7302	if (AT_WORD_BOUNDARY (d))
7303	break;
7304	goto fail;
7305
7306	case notwordbound:
7307	DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7308	if (AT_WORD_BOUNDARY (d))
7309	goto fail;
7310	break;
7311	#else
7312	case wordbound:
7313	{
7314	boolean prevchar, thischar;
7315
7316	DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7317	if (AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d))
7318	break;
7319
7320	prevchar = WORDCHAR_P (d - `1`);
7321	thischar = WORDCHAR_P (d);
7322	if (prevchar != thischar)
7323	break;
7324	goto fail;
7325	}
7326
7327	case notwordbound:
7328	{
7329	boolean prevchar, thischar;
7330
7331	DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7332	if (AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d))
7333	goto fail;
7334
7335	prevchar = WORDCHAR_P (d - `1`);
7336	thischar = WORDCHAR_P (d);
7337	if (prevchar != thischar)
7338	goto fail;
7339	break;
7340	}
7341	#endif
7342
7343	case wordbeg:
7344	DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7345	if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7346	&& (AT_STRINGS_BEG (d) \|\| !WORDCHAR_P (d - `1`)))
7347	break;
7348	goto fail;
7349
7350	case wordend:
7351	DEBUG_PRINT1 ("EXECUTING wordend.\n");
7352	if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - `1`)
7353	&& (AT_STRINGS_END (d) \|\| !WORDCHAR_P (d)))
7354	break;
7355	goto fail;
7356
7357	#ifdef emacs
7358	case before_dot:
7359	DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7360	if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7361	goto fail;
7362	break;
7363
7364	case at_dot:
7365	DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7366	if (PTR_CHAR_POS ((unsigned char *) d) != point)
7367	goto fail;
7368	break;
7369
7370	case after_dot:
7371	DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7372	if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7373	goto fail;
7374	break;
7375
7376	case syntaxspec:
7377	DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7378	mcnt = *p++;
7379	goto matchsyntax;
7380
7381	case wordchar:
7382	DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7383	mcnt = (int) Sword;
7384	matchsyntax:
7385	PREFETCH ();
7386	/ Can't use d++ here; SYNTAX may be an unsafe macro. /*
7387	d++;
7388	if (SYNTAX (d[-`1`]) != (enum syntaxcode) mcnt)
7389	goto fail;
7390	SET_REGS_MATCHED ();
7391	break;
7392
7393	case notsyntaxspec:
7394	DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7395	mcnt = *p++;
7396	goto matchnotsyntax;
7397
7398	case notwordchar:
7399	DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7400	mcnt = (int) Sword;
7401	matchnotsyntax:
7402	PREFETCH ();
7403	/ Can't use d++ here; SYNTAX may be an unsafe macro. /*
7404	d++;
7405	if (SYNTAX (d[-`1`]) == (enum syntaxcode) mcnt)
7406	goto fail;
7407	SET_REGS_MATCHED ();
7408	break;
7409
7410	#else /* not emacs */
7411	case wordchar:
7412	DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7413	PREFETCH ();
7414	if (!WORDCHAR_P (d))
7415	goto fail;
7416	SET_REGS_MATCHED ();
7417	d++;
7418	break;
7419
7420	case notwordchar:
7421	DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7422	PREFETCH ();
7423	if (WORDCHAR_P (d))
7424	goto fail;
7425	SET_REGS_MATCHED ();
7426	d++;
7427	break;
7428	#endif /* not emacs */
7429
7430	default:
7431	abort ();
7432	}
7433	continue; / Successfully executed one pattern command; keep going. /
7434
7435
7436	/ We goto here if a matching operation fails. /
7437	fail:
7438	if (!FAIL_STACK_EMPTY ())
7439	{ / A restart point is known. Restore to that state. /
7440	DEBUG_PRINT1 ("\nFAIL:\n");
7441	POP_FAILURE_POINT (d, p,
7442	lowest_active_reg, highest_active_reg,
7443	regstart, regend, reg_info);
7444
7445	/ If this failure point is a dummy, try the next one. /
7446	if (!p)
7447	goto fail;
7448
7449	/ If we failed to the end of the pattern, don't examine p. /*
7450	assert (p <= pend);
7451	if (p < pend)
7452	{
7453	boolean is_a_jump_n = false;
7454
7455	/ If failed to a backwards jump that's part of a repetition*
7456	loop, need to pop this failure point and use the next one. /*
7457	switch ((re_opcode_t) *p)
7458	{
7459	case jump_n:
7460	is_a_jump_n = true;
7461	/ Fall through. /
7462	case maybe_pop_jump:
7463	case pop_failure_jump:
7464	case jump:
7465	p1 = p + `1`;
7466	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7467	p1 += mcnt;
7468
7469	if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7470	\|\| (!is_a_jump_n
7471	&& (re_opcode_t) *p1 == on_failure_jump))
7472	goto fail;
7473	break;
7474	default:
7475	/ do nothing / ;
7476	}
7477	}
7478
7479	if (d >= string1 && d <= end1)
7480	dend = end_match_1;
7481	}
7482	else
7483	break; / Matching at this starting point really fails. /
7484	} / for (;;) /
7485
7486	if (best_regs_set)
7487	goto restore_best_regs;
7488
7489	FREE_VARIABLES ();
7490
7491	return -`1`; / Failure to match. /
7492	} / re_match_2 /
7493
7494	/ Subroutine definitions for re_match_2. /
7495
7496
7497	/ We are passed P pointing to a register number after a start_memory.*
7498
7499	Return true if the pattern up to the corresponding stop_memory can
7500	match the empty string, and false otherwise.
7501
7502	If we find the matching stop_memory, sets P to point to one past its number.
7503	Otherwise, sets P to an undefined byte less than or equal to END.
7504
7505	We don't handle duplicates properly (yet). /*
7506
7507	static boolean
7508	PREFIX(group_match_null_string_p) (UCHAR_T *p, UCHAR_T end,
7509	PREFIX(register_info_type) *reg_info)
7510	{
7511	int mcnt;
7512	/ Point to after the args to the start_memory. /
7513	UCHAR_T p1 = p + `2`;
7514
7515	while (p1 < end)
7516	{
7517	/ Skip over opcodes that can match nothing, and return true or*
7518	false, as appropriate, when we get to one that can't, or to the
7519	matching stop_memory. /*
7520
7521	switch ((re_opcode_t) *p1)
7522	{
7523	/ Could be either a loop or a series of alternatives. /
7524	case on_failure_jump:
7525	p1++;
7526	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7527
7528	/ If the next operation is not a jump backwards in the*
7529	pattern. /*
7530
7531	if (mcnt >= `0`)
7532	{
7533	/ Go through the on_failure_jumps of the alternatives,*
7534	seeing if any of the alternatives cannot match nothing.
7535	The last alternative starts with only a jump,
7536	whereas the rest start with on_failure_jump and end
7537	with a jump, e.g., here is the pattern for `a\|b\|c':
7538
7539	/on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7540	/on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7541	/exactn/1/c
7542
7543	So, we have to first go through the first (n-1)
7544	alternatives and then deal with the last one separately. /*
7545
7546
7547	/ Deal with the first (n-1) alternatives, which start*
7548	with an on_failure_jump (see above) that jumps to right
7549	past a jump_past_alt. /*
7550
7551	while ((re_opcode_t) p1[mcnt-(`1`+OFFSET_ADDRESS_SIZE)] ==
7552	jump_past_alt)
7553	{
7554	/ `mcnt' holds how many bytes long the alternative*
7555	is, including the ending `jump_past_alt' and
7556	its number. /*
7557
7558	if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7559	(`1` + OFFSET_ADDRESS_SIZE),
7560	reg_info))
7561	return false;
7562
7563	/ Move to right after this alternative, including the*
7564	jump_past_alt. /*
7565	p1 += mcnt;
7566
7567	/ Break if it's the beginning of an n-th alternative*
7568	that doesn't begin with an on_failure_jump. /*
7569	if ((re_opcode_t) *p1 != on_failure_jump)
7570	break;
7571
7572	/ Still have to check that it's not an n-th*
7573	alternative that starts with an on_failure_jump. /*
7574	p1++;
7575	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7576	if ((re_opcode_t) p1[mcnt-(`1`+OFFSET_ADDRESS_SIZE)] !=
7577	jump_past_alt)
7578	{
7579	/ Get to the beginning of the n-th alternative. /
7580	p1 -= `1` + OFFSET_ADDRESS_SIZE;
7581	break;
7582	}
7583	}
7584
7585	/ Deal with the last alternative: go back and get number*
7586	of the `jump_past_alt' just before it. `mcnt' contains
7587	the length of the alternative. /*
7588	EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7589
7590	if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7591	return false;
7592
7593	p1 += mcnt; / Get past the n-th alternative. /
7594	} / if mcnt > 0 /
7595	break;
7596
7597
7598	case stop_memory:
7599	assert (p1[`1`] == **p);
7600	*p = p1 + `2`;
7601	return true;
7602
7603
7604	default:
7605	if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7606	return false;
7607	}
7608	} / while p1 < end /
7609
7610	return false;
7611	} / group_match_null_string_p /
7612
7613
7614	/ Similar to group_match_null_string_p, but doesn't deal with alternatives:*
7615	It expects P to be the first byte of a single alternative and END one
7616	byte past the last. The alternative can contain groups. /*
7617
7618	static boolean
7619	PREFIX(alt_match_null_string_p) (UCHAR_T p, UCHAR_T end,
7620	PREFIX(register_info_type) *reg_info)
7621	{
7622	int mcnt;
7623	UCHAR_T *p1 = p;
7624
7625	while (p1 < end)
7626	{
7627	/ Skip over opcodes that can match nothing, and break when we get*
7628	to one that can't. /*
7629
7630	switch ((re_opcode_t) *p1)
7631	{
7632	/ It's a loop. /
7633	case on_failure_jump:
7634	p1++;
7635	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7636	p1 += mcnt;
7637	break;
7638
7639	default:
7640	if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7641	return false;
7642	}
7643	} / while p1 < end /
7644
7645	return true;
7646	} / alt_match_null_string_p /
7647
7648
7649	/ Deals with the ops common to group_match_null_string_p and*
7650	alt_match_null_string_p.
7651
7652	Sets P to one after the op and its arguments, if any. /*
7653
7654	static boolean
7655	PREFIX(common_op_match_null_string_p) (UCHAR_T *p, UCHAR_T end,
7656	PREFIX(register_info_type) *reg_info)
7657	{
7658	int mcnt;
7659	boolean ret;
7660	int reg_no;
7661	UCHAR_T p1 = p;
7662
7663	switch ((re_opcode_t) *p1++)
7664	{
7665	case no_op:
7666	case begline:
7667	case endline:
7668	case begbuf:
7669	case endbuf:
7670	case wordbeg:
7671	case wordend:
7672	case wordbound:
7673	case notwordbound:
7674	#ifdef emacs
7675	case before_dot:
7676	case at_dot:
7677	case after_dot:
7678	#endif
7679	break;
7680
7681	case start_memory:
7682	reg_no = *p1;
7683	assert (reg_no > `0` && reg_no <= MAX_REGNUM);
7684	ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7685
7686	/ Have to set this here in case we're checking a group which*
7687	contains a group and a back reference to it. /*
7688
7689	if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7690	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7691
7692	if (!ret)
7693	return false;
7694	break;
7695
7696	/ If this is an optimized succeed_n for zero times, make the jump. /
7697	case jump:
7698	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7699	if (mcnt >= `0`)
7700	p1 += mcnt;
7701	else
7702	return false;
7703	break;
7704
7705	case succeed_n:
7706	/ Get to the number of times to succeed. /
7707	p1 += OFFSET_ADDRESS_SIZE;
7708	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7709
7710	if (mcnt == `0`)
7711	{
7712	p1 -= `2` * OFFSET_ADDRESS_SIZE;
7713	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7714	p1 += mcnt;
7715	}
7716	else
7717	return false;
7718	break;
7719
7720	case duplicate:
7721	if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7722	return false;
7723	break;
7724
7725	case set_number_at:
7726	p1 += `2` * OFFSET_ADDRESS_SIZE;
7727	return false;
7728
7729	default:
7730	/ All other opcodes mean we cannot match the empty string. /
7731	return false;
7732	}
7733
7734	*p = p1;
7735	return true;
7736	} / common_op_match_null_string_p /
7737
7738
7739	/ Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN*
7740	bytes; nonzero otherwise. /*
7741
7742	static int
7743	PREFIX(bcmp_translate) (const CHAR_T s1, const* CHAR_T s2, register* int len,
7744	RE_TRANSLATE_TYPE translate)
7745	{
7746	register const UCHAR_T p1 = (const* UCHAR_T *) s1;
7747	register const UCHAR_T p2 = (const* UCHAR_T *) s2;
7748	while (len)
7749	{
7750	#ifdef WCHAR
7751	if (((p1<=`0xff`)?translate[p1++]:*p1++)
7752	!= ((p2<=`0xff`)?translate[p2++]:*p2++))
7753	return `1`;
7754	#else /* BYTE */
7755	if (translate[p1++] != translate[p2++]) return `1`;
7756	#endif /* WCHAR */
7757	len--;
7758	}
7759	return `0`;
7760	}
7761
7762
7763	#else /* not INSIDE_RECURSION */
7764
7765	/ Entry points for GNU code. /
7766
7767	/ re_compile_pattern is the GNU regular expression compiler: it*
7768	compiles PATTERN (of length SIZE) and puts the result in BUFP.
7769	Returns 0 if the pattern was valid, otherwise an error string.
7770
7771	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7772	are set in BUFP on entry.
7773
7774	We call regex_compile to do the actual compilation. /*
7775
7776	const char *
7777	re_compile_pattern (const char *pattern, size_t length,
7778	struct re_pattern_buffer *bufp)
7779	{
7780	reg_errcode_t ret;
7781
7782	/ GNU code is written to assume at least RE_NREGS registers will be set*
7783	(and at least one extra will be -1). /*
7784	bufp->regs_allocated = REGS_UNALLOCATED;
7785
7786	/ And GNU code determines whether or not to get register information*
7787	by passing null for the REGS argument to re_match, etc., not by
7788	setting no_sub. /*
7789	bufp->no_sub = `0`;
7790
7791	/ Match anchors at newline. /
7792	bufp->newline_anchor = `1`;
7793
7794	# ifdef MBS_SUPPORT
7795	if (MB_CUR_MAX != `1`)
7796	ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
7797	else
7798	# endif
7799	ret = byte_regex_compile (pattern, size: length, re_syntax_options, bufp);
7800
7801	if (!ret)
7802	return NULL;
7803	return gettext (re_error_msgid[(int) ret]);
7804	}
7805	#ifdef _LIBC
7806	weak_alias (__re_compile_pattern, re_compile_pattern)
7807	#endif
7808
7809	/ Entry points compatible with 4.2 BSD regex library. We don't define*
7810	them unless specifically requested. /*
7811
7812	#if defined _REGEX_RE_COMP \|\| defined _LIBC
7813
7814	/ BSD has one and only one pattern buffer. /
7815	static struct re_pattern_buffer re_comp_buf;
7816
7817	char *
7818	#ifdef _LIBC
7819	/ Make these definitions weak in libc, so POSIX programs can redefine*
7820	these names if they don't use our functions, and still use
7821	regcomp/regexec below without link errors. /*
7822	weak_function
7823	#endif
7824	re_comp (const char *s)
7825	{
7826	reg_errcode_t ret;
7827
7828	if (!s)
7829	{
7830	if (!re_comp_buf.buffer)
7831	return (char *) gettext ("No previous regular expression");
7832	return `0`;
7833	}
7834
7835	if (!re_comp_buf.buffer)
7836	{
7837	re_comp_buf.buffer = (unsigned char *) malloc (size: `200`);
7838	if (re_comp_buf.buffer == NULL)
7839	return (char ) gettext (re_error_msgid[(int*) REG_ESPACE]);
7840	re_comp_buf.allocated = `200`;
7841
7842	re_comp_buf.fastmap = (char *) malloc (size: `1` << BYTEWIDTH);
7843	if (re_comp_buf.fastmap == NULL)
7844	return (char ) gettext (re_error_msgid[(int*) REG_ESPACE]);
7845	}
7846
7847	/ Since `re_exec' always passes NULL for the `regs' argument, we*
7848	don't need to initialize the pattern buffer fields which affect it. /*
7849
7850	/ Match anchors at newlines. /
7851	re_comp_buf.newline_anchor = `1`;
7852
7853	# ifdef MBS_SUPPORT
7854	if (MB_CUR_MAX != `1`)
7855	ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7856	else
7857	# endif
7858	ret = byte_regex_compile (pattern: s, size: strlen (s: s), re_syntax_options, bufp: &re_comp_buf);
7859
7860	if (!ret)
7861	return NULL;
7862
7863	/ Yes, we're discarding `const' here if !HAVE_LIBINTL. /
7864	return (char ) gettext (re_error_msgid[(int*) ret]);
7865	}
7866
7867
7868	int
7869	#ifdef _LIBC
7870	weak_function
7871	#endif
7872	re_exec (const char *s)
7873	{
7874	const int len = strlen (s: s);
7875	return
7876	`0` <= re_search (bufp: &re_comp_buf, string: s, size: len, startpos: `0`, range: len, regs: (struct re_registers *) `0`);
7877	}
7878
7879	#endif /* _REGEX_RE_COMP */
7880
7881	/ POSIX.2 functions. Don't define these for Emacs. /
7882
7883	#ifndef emacs
7884
7885	/ regcomp takes a regular expression as a string and compiles it.*
7886
7887	PREG is a regex_t . We do not expect any fields to be initialized,*
7888	since POSIX says we shouldn't. Thus, we set
7889
7890	`buffer' to the compiled pattern;
7891	`used' to the length of the compiled pattern;
7892	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7893	REG_EXTENDED bit in CFLAGS is set; otherwise, to
7894	RE_SYNTAX_POSIX_BASIC;
7895	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
7896	`fastmap' to an allocated space for the fastmap;
7897	`fastmap_accurate' to zero;
7898	`re_nsub' to the number of subexpressions in PATTERN.
7899
7900	PATTERN is the address of the pattern string.
7901
7902	CFLAGS is a series of bits which affect compilation.
7903
7904	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7905	use POSIX basic syntax.
7906
7907	If REG_NEWLINE is set, then . and [^...] don't match newline.
7908	Also, regexec will try a match beginning after every newline.
7909
7910	If REG_ICASE is set, then we considers upper- and lowercase
7911	versions of letters to be equivalent when matching.
7912
7913	If REG_NOSUB is set, then when PREG is passed to regexec, that
7914	routine will report only success or failure, and nothing about the
7915	registers.
7916
7917	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
7918	the return codes and their meanings.) /*
7919
7920	int
7921	regcomp (regex_t preg, const* char pattern, int* cflags)
7922	{
7923	reg_errcode_t ret;
7924	reg_syntax_t syntax
7925	= (cflags & REG_EXTENDED) ?
7926	RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7927
7928	/ regex_compile will allocate the space for the compiled pattern. /
7929	preg->buffer = `0`;
7930	preg->allocated = `0`;
7931	preg->used = `0`;
7932
7933	/ Try to allocate space for the fastmap. /
7934	preg->fastmap = (char *) malloc (size: `1` << BYTEWIDTH);
7935
7936	if (cflags & REG_ICASE)
7937	{
7938	int i;
7939
7940	preg->translate
7941	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
7942	* sizeof (*(RE_TRANSLATE_TYPE)`0`));
7943	if (preg->translate == NULL)
7944	return (int) REG_ESPACE;
7945
7946	/ Map uppercase characters to corresponding lowercase ones. /
7947	for (i = `0`; i < CHAR_SET_SIZE; i++)
7948	preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
7949	}
7950	else
7951	preg->translate = NULL;
7952
7953	/ If REG_NEWLINE is set, newlines are treated differently. /
7954	if (cflags & REG_NEWLINE)
7955	{ / REG_NEWLINE implies neither . nor [^...] match newline. /
7956	syntax &= ~RE_DOT_NEWLINE;
7957	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
7958	/ It also changes the matching behavior. /
7959	preg->newline_anchor = `1`;
7960	}
7961	else
7962	preg->newline_anchor = `0`;
7963
7964	preg->no_sub = !!(cflags & REG_NOSUB);
7965
7966	/ POSIX says a null character in the pattern terminates it, so we*
7967	can use strlen here in compiling the pattern. /*
7968	# ifdef MBS_SUPPORT
7969	if (MB_CUR_MAX != `1`)
7970	ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
7971	else
7972	# endif
7973	ret = byte_regex_compile (pattern, size: strlen (s: pattern), syntax, bufp: preg);
7974
7975	/ POSIX doesn't distinguish between an unmatched open-group and an*
7976	unmatched close-group: both are REG_EPAREN. /*
7977	if (ret == REG_ERPAREN) ret = REG_EPAREN;
7978
7979	if (ret == REG_NOERROR && preg->fastmap)
7980	{
7981	/ Compute the fastmap now, since regexec cannot modify the pattern*
7982	buffer. /*
7983	if (re_compile_fastmap (bufp: preg) == -`2`)
7984	{
7985	/ Some error occurred while computing the fastmap, just forget*
7986	about it. /*
7987	free (ptr: preg->fastmap);
7988	preg->fastmap = NULL;
7989	}
7990	}
7991
7992	return (int) ret;
7993	}
7994	#ifdef _LIBC
7995	weak_alias (__regcomp, regcomp)
7996	#endif
7997
7998
7999	/ regexec searches for a given pattern, specified by PREG, in the*
8000	string STRING.
8001
8002	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8003	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
8004	least NMATCH elements, and we set them to the offsets of the
8005	corresponding matched substrings.
8006
8007	EFLAGS specifies `execution flags' which affect matching: if
8008	REG_NOTBOL is set, then ^ does not match at the beginning of the
8009	string; if REG_NOTEOL is set, then $ does not match at the end.
8010
8011	We return 0 if we find a match and REG_NOMATCH if not. /*
8012
8013	int
8014	regexec (const regex_t preg, const* char *string, size_t nmatch,
8015	regmatch_t pmatch[], int eflags)
8016	{
8017	int ret;
8018	struct re_registers regs;
8019	regex_t private_preg;
8020	int len = strlen (s: string);
8021	boolean want_reg_info = !preg->no_sub && nmatch > `0`;
8022
8023	private_preg = *preg;
8024
8025	private_preg.not_bol = !!(eflags & REG_NOTBOL);
8026	private_preg.not_eol = !!(eflags & REG_NOTEOL);
8027
8028	/ The user has told us exactly how many registers to return*
8029	information about, via `nmatch'. We have to pass that on to the
8030	matching routines. /*
8031	private_preg.regs_allocated = REGS_FIXED;
8032
8033	if (want_reg_info)
8034	{
8035	regs.num_regs = nmatch;
8036	regs.start = TALLOC (nmatch * `2`, regoff_t);
8037	if (regs.start == NULL)
8038	return (int) REG_NOMATCH;
8039	regs.end = regs.start + nmatch;
8040	}
8041
8042	/ Perform the searching operation. /
8043	ret = re_search (bufp: &private_preg, string, size: len,
8044	/ start: / startpos: `0`, / range: / range: len,
8045	regs: want_reg_info ? &regs : (struct re_registers *) `0`);
8046
8047	/ Copy the register information to the POSIX structure. /
8048	if (want_reg_info)
8049	{
8050	if (ret >= `0`)
8051	{
8052	unsigned r;
8053
8054	for (r = `0`; r < nmatch; r++)
8055	{
8056	pmatch[r].rm_so = regs.start[r];
8057	pmatch[r].rm_eo = regs.end[r];
8058	}
8059	}
8060
8061	/ If we needed the temporary register info, free the space now. /
8062	free (ptr: regs.start);
8063	}
8064
8065	/ We want zero return to mean success, unlike `re_search'. /
8066	return ret >= `0` ? (int) REG_NOERROR : (int) REG_NOMATCH;
8067	}
8068	#ifdef _LIBC
8069	weak_alias (__regexec, regexec)
8070	#endif
8071
8072
8073	/ Returns a message corresponding to an error code, ERRCODE, returned*
8074	from either regcomp or regexec. We don't use PREG here. /*
8075
8076	size_t
8077	regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
8078	char *errbuf, size_t errbuf_size)
8079	{
8080	const char *msg;
8081	size_t msg_size;
8082
8083	if (errcode < `0`
8084	\|\| errcode >= (int) (sizeof (re_error_msgid)
8085	/ sizeof (re_error_msgid[`0`])))
8086	/ Only error codes returned by the rest of the code should be passed*
8087	to this routine. If we are given anything else, or if other regex
8088	code generates an invalid error code, then the program has a bug.
8089	Dump core so we can fix it. /*
8090	abort ();
8091
8092	msg = gettext (re_error_msgid[errcode]);
8093
8094	msg_size = strlen (s: msg) + `1`; / Includes the null. /
8095
8096	if (errbuf_size != `0`)
8097	{
8098	if (msg_size > errbuf_size)
8099	{
8100	#if defined HAVE_MEMPCPY \|\| defined _LIBC
8101	((char* *) mempcpy (errbuf, msg, errbuf_size - `1`)) = `'\0'`;
8102	#else
8103	(void) memcpy (dest: errbuf, src: msg, n: errbuf_size - `1`);
8104	errbuf[errbuf_size - `1`] = `0`;
8105	#endif
8106	}
8107	else
8108	(void) memcpy (dest: errbuf, src: msg, n: msg_size);
8109	}
8110
8111	return msg_size;
8112	}
8113	#ifdef _LIBC
8114	weak_alias (__regerror, regerror)
8115	#endif
8116
8117
8118	/ Free dynamically allocated space used by PREG. /
8119
8120	void
8121	regfree (regex_t *preg)
8122	{
8123	free (ptr: preg->buffer);
8124	preg->buffer = NULL;
8125
8126	preg->allocated = `0`;
8127	preg->used = `0`;
8128
8129	free (ptr: preg->fastmap);
8130	preg->fastmap = NULL;
8131	preg->fastmap_accurate = `0`;
8132
8133	free (ptr: preg->translate);
8134	preg->translate = NULL;
8135	}
8136	#ifdef _LIBC
8137	weak_alias (__regfree, regfree)
8138	#endif
8139
8140	#endif /* not emacs */
8141
8142	#endif /* not INSIDE_RECURSION */
8143
8144
8145	#undef STORE_NUMBER
8146	#undef STORE_NUMBER_AND_INCR
8147	#undef EXTRACT_NUMBER
8148	#undef EXTRACT_NUMBER_AND_INCR
8149
8150	#undef DEBUG_PRINT_COMPILED_PATTERN
8151	#undef DEBUG_PRINT_DOUBLE_STRING
8152
8153	#undef INIT_FAIL_STACK
8154	#undef RESET_FAIL_STACK
8155	#undef DOUBLE_FAIL_STACK
8156	#undef PUSH_PATTERN_OP
8157	#undef PUSH_FAILURE_POINTER
8158	#undef PUSH_FAILURE_INT
8159	#undef PUSH_FAILURE_ELT
8160	#undef POP_FAILURE_POINTER
8161	#undef POP_FAILURE_INT
8162	#undef POP_FAILURE_ELT
8163	#undef DEBUG_PUSH
8164	#undef DEBUG_POP
8165	#undef PUSH_FAILURE_POINT
8166	#undef POP_FAILURE_POINT
8167
8168	#undef REG_UNSET_VALUE
8169	#undef REG_UNSET
8170
8171	#undef PATFETCH
8172	#undef PATFETCH_RAW
8173	#undef PATUNFETCH
8174	#undef TRANSLATE
8175
8176	#undef INIT_BUF_SIZE
8177	#undef GET_BUFFER_SPACE
8178	#undef BUF_PUSH
8179	#undef BUF_PUSH_2
8180	#undef BUF_PUSH_3
8181	#undef STORE_JUMP
8182	#undef STORE_JUMP2
8183	#undef INSERT_JUMP
8184	#undef INSERT_JUMP2
8185	#undef EXTEND_BUFFER
8186	#undef GET_UNSIGNED_NUMBER
8187	#undef FREE_STACK_RETURN
8188
8189	# undef POINTER_TO_OFFSET
8190	# undef MATCHING_IN_FRST_STRING
8191	# undef PREFETCH
8192	# undef AT_STRINGS_BEG
8193	# undef AT_STRINGS_END
8194	# undef WORDCHAR_P
8195	# undef FREE_VAR
8196	# undef FREE_VARIABLES
8197	# undef NO_HIGHEST_ACTIVE_REG
8198	# undef NO_LOWEST_ACTIVE_REG
8199
8200	# undef CHAR_T
8201	# undef UCHAR_T
8202	# undef COMPILED_BUFFER_VAR
8203	# undef OFFSET_ADDRESS_SIZE
8204	# undef CHAR_CLASS_SIZE
8205	# undef PREFIX
8206	# undef ARG_PREFIX
8207	# undef PUT_CHAR
8208	# undef BYTE
8209	# undef WCHAR
8210
8211	# define DEFINED_ONCE
8212

source code of libiberty/regex.c