tst-rxspencer.c source code [glibc/posix/tst-rxspencer.c]

1	/ Regular expression tests.*
2	Copyright (C) 2003-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sys/types.h>
20	#include <mcheck.h>
21	#include <regex.h>
22	#include <stdio.h>
23	#include <stdlib.h>
24	#include <string.h>
25	#include <locale.h>
26	#include <getopt.h>
27
28	static void
29	replace_special_chars (char *str)
30	{
31	for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
32	switch (*str)
33	{
34	case `'N'`: str = `'\n'`; break*;
35	case `'T'`: str = `'\t'`; break*;
36	case `'S'`: str = `' '`; break*;
37	case `'Z'`: str = `'\0'`; break*;
38	}
39	}
40
41	static void
42	glibc_re_syntax (char *str)
43	{
44	char p, end = strchr (str, `'\0'`) + `1`;
45
46	/ Replace [[:<:]] with \< and [[:>:]] with \>. /
47	for (p = str; (p = strstr (p, "[[:")) != NULL; )
48	if ((p[`3`] == `'<'` \|\| p[`3`] == `'>'`) && strncmp (p + `4`, ":]]", `3`) == `0`)
49	{
50	p[`0`] = `'\\'`;
51	p[`1`] = p[`3`];
52	memmove (p + `2`, p + `7`, end - p - `7`);
53	end -= `5`;
54	p += `2`;
55	}
56	else
57	p += `3`;
58	}
59
60	static char *
61	mb_replace (char dst, const* char c)
62	{
63	switch (c)
64	{
65	/ Replace a with \'a and A with \'A. /
66	case `'a'`:
67	*dst++ = `'\xc3'`;
68	*dst++ = `'\xa1'`;
69	break;
70	case `'A'`:
71	*dst++ = `'\xc3'`;
72	*dst++ = `'\x81'`;
73	break;
74	/ Replace b with \v{c} and B with \v{C}. /
75	case `'b'`:
76	*dst++ = `'\xc4'`;
77	*dst++ = `'\x8d'`;
78	break;
79	case `'B'`:
80	*dst++ = `'\xc4'`;
81	*dst++ = `'\x8c'`;
82	break;
83	/ Replace c with \v{d} and C with \v{D}. /
84	case `'c'`:
85	*dst++ = `'\xc4'`;
86	*dst++ = `'\x8f'`;
87	break;
88	case `'C'`:
89	*dst++ = `'\xc4'`;
90	*dst++ = `'\x8e'`;
91	break;
92	/ Replace d with \'e and D with \'E. /
93	case `'d'`:
94	*dst++ = `'\xc3'`;
95	*dst++ = `'\xa9'`;
96	break;
97	case `'D'`:
98	*dst++ = `'\xc3'`;
99	*dst++ = `'\x89'`;
100	break;
101	}
102	return dst;
103	}
104
105	static char *
106	mb_frob_string (const char str, const* char *letters)
107	{
108	char ret, dst;
109	const char *src;
110
111	if (str == NULL)
112	return NULL;
113
114	ret = malloc (size: `2` * strlen (str) + `1`);
115	if (ret == NULL)
116	return NULL;
117
118	for (src = str, dst = ret; *src; ++src)
119	if (strchr (letters, *src))
120	dst = mb_replace (dst, c: *src);
121	else
122	dst++ = src;
123	*dst = `'\0'`;
124	return ret;
125	}
126
127	/ Like mb_frob_string, but don't replace anything between*
128	[: and :], [. and .] or [= and =] or characters escaped
129	with a backslash. /*
130
131	static char *
132	mb_frob_pattern (const char str, const* char *letters)
133	{
134	char ret, dst;
135	const char *src;
136	int in_class = `0`, escaped = `0`;
137
138	if (str == NULL)
139	return NULL;
140
141	ret = malloc (size: `2` * strlen (str) + `1`);
142	if (ret == NULL)
143	return NULL;
144
145	for (src = str, dst = ret; *src; ++src)
146	if (*src == `'\\'`)
147	{
148	escaped ^= `1`;
149	dst++ = src;
150	}
151	else if (escaped)
152	{
153	escaped = `0`;
154	dst++ = src;
155	continue;
156	}
157	else if (!in_class && strchr (letters, *src))
158	dst = mb_replace (dst, c: *src);
159	else
160	{
161	if (!in_class && *src == `'['` && strchr (":.=", src[`1`]))
162	in_class = `1`;
163	else if (in_class && *src == `']'` && strchr (":.=", src[-`1`]))
164	in_class = `0`;
165	dst++ = src;
166	}
167	*dst = `'\0'`;
168	return ret;
169	}
170
171	static int
172	check_match (regmatch_t rm, int* idx, const char *string,
173	const char match, const* char *fail)
174	{
175	if (match[`0`] == `'-'` && match[`1`] == `'\0'`)
176	{
177	if (rm[idx].rm_so == -`1` && rm[idx].rm_eo == -`1`)
178	return `0`;
179	printf (format: "%s rm[%d] unexpectedly matched\n", fail, idx);
180	return `1`;
181	}
182
183	if (rm[idx].rm_so == -`1` \|\| rm[idx].rm_eo == -`1`)
184	{
185	printf (format: "%s rm[%d] unexpectedly did not match\n", fail, idx);
186	return `1`;
187	}
188
189	if (match[`0`] == `'@'`)
190	{
191	if (rm[idx].rm_so != rm[idx].rm_eo)
192	{
193	printf (format: "%s rm[%d] not empty\n", fail, idx);
194	return `1`;
195	}
196
197	if (strncmp (string + rm[idx].rm_so, match + `1`, strlen (match + `1`) ?: `1`))
198	{
199	printf (format: "%s rm[%d] not matching %s\n", fail, idx, match);
200	return `1`;
201	}
202	return `0`;
203	}
204
205	if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
206	\|\| strncmp (string + rm[idx].rm_so, match,
207	rm[idx].rm_eo - rm[idx].rm_so))
208	{
209	printf (format: "%s rm[%d] not matching %s\n", fail, idx, match);
210	return `1`;
211	}
212
213	return `0`;
214	}
215
216	static int
217	test (const char pattern, int* cflags, const char string, int* eflags,
218	char expect, char* matches, const* char *fail)
219	{
220	regex_t re;
221	regmatch_t rm[`10`];
222	int n, ret = `0`;
223
224	n = regcomp (preg: &re, pattern: pattern, cflags: cflags);
225	if (n != `0`)
226	{
227	char buf[`500`];
228	if (eflags == -`1`)
229	{
230	static struct { reg_errcode_t code; const char *name; } codes []
231	#define C(x) { REG_##x, #x }
232	= { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
233	C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
234	C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
235	C(ESPACE), C(BADRPT) };
236
237	for (int i = `0`; i < sizeof (codes) / sizeof (codes[`0`]); ++i)
238	if (n == codes[i].code)
239	{
240	if (strcmp (string, codes[i].name))
241	{
242	printf (format: "%s regcomp returned REG_%s (expected REG_%s)\n",
243	fail, codes[i].name, string);
244	return `1`;
245	}
246	return `0`;
247	}
248
249	printf (format: "%s regcomp return value REG_%d\n", fail, n);
250	return `1`;
251	}
252
253	regerror (errcode: n, preg: &re, errbuf: buf, errbuf_size: sizeof (buf));
254	printf (format: "%s regcomp failed: %s\n", fail, buf);
255	return `1`;
256	}
257
258	if (eflags == -`1`)
259	{
260	regfree (preg: &re);
261
262	/ The test case file assumes something only guaranteed by the*
263	rxspencer regex implementation. Namely that for empty
264	expressions regcomp() return REG_EMPTY. This is not the case
265	for us and so we ignore this error. /*
266	if (strcmp (string, "EMPTY") == `0`)
267	return `0`;
268
269	printf (format: "%s regcomp unexpectedly succeeded\n", fail);
270	return `1`;
271	}
272
273	if (regexec (preg: &re, String: string, nmatch: `10`, pmatch: rm, eflags: eflags))
274	{
275	regfree (preg: &re);
276	if (expect == NULL)
277	return `0`;
278	printf (format: "%s regexec failed\n", fail);
279	return `1`;
280	}
281
282	regfree (preg: &re);
283
284	if (expect == NULL)
285	{
286	printf (format: "%s regexec unexpectedly succeeded\n", fail);
287	return `1`;
288	}
289
290	if (cflags & REG_NOSUB)
291	return `0`;
292
293	ret = check_match (rm, idx: `0`, string, match: expect, fail);
294	if (matches == NULL)
295	return ret;
296
297	for (n = `1`; ret == `0` && n < `10`; ++n)
298	{
299	char *p = NULL;
300
301	if (matches)
302	{
303	p = strchr (matches, `','`);
304	if (p != NULL)
305	*p = `'\0'`;
306	}
307	ret = check_match (rm, idx: n, string, match: matches ?: "-", fail);
308	if (p)
309	{
310	*p = `','`;
311	matches = p + `1`;
312	}
313	else
314	matches = NULL;
315	}
316
317	return ret;
318	}
319
320	static int
321	mb_test (const char pattern, int* cflags, const char string, int* eflags,
322	char expect, const* char matches, const* char *letters,
323	const char *fail)
324	{
325	char *pattern_mb = mb_frob_pattern (str: pattern, letters);
326	const char *string_mb
327	= eflags == -`1` ? string : mb_frob_string (str: string, letters);
328	char *expect_mb = mb_frob_string (str: expect, letters);
329	char *matches_mb = mb_frob_string (str: matches, letters);
330	int ret = `0`;
331
332	if (!pattern_mb \|\| !string_mb
333	\|\| (expect && !expect_mb) \|\| (matches && !matches_mb))
334	{
335	printf (format: "%s %m", fail);
336	ret = `1`;
337	}
338	else
339	ret = test (pattern: pattern_mb, cflags, string: string_mb, eflags, expect: expect_mb,
340	matches: matches_mb, fail);
341
342	free (ptr: matches_mb);
343	free (ptr: expect_mb);
344	if (string_mb != string)
345	free (ptr: (char *) string_mb);
346	free (ptr: pattern_mb);
347	return ret;
348	}
349
350	static int
351	mb_tests (const char pattern, int* cflags, const char string, int* eflags,
352	char expect, const* char *matches)
353	{
354	int ret = `0`;
355	int i;
356	char letters[`9`], fail[`20`];
357
358	/ The tests aren't supposed to work with xdigit, since a-dA-D are*
359	hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. /*
360	if (strstr (pattern, "[:xdigit:]"))
361	return `0`;
362
363	/ XXX: regex ATM handles only single byte equivalence classes. /
364	if (strstr (pattern, "[[=b=]]"))
365	return `0`;
366
367	for (i = `1`; i < `16`; ++i)
368	{
369	char *p = letters;
370	if (i & `1`)
371	{
372	if (!strchr (pattern, `'a'`) && !strchr (string, `'a'`)
373	&& !strchr (pattern, `'A'`) && !strchr (string, `'A'`))
374	continue;
375	p++ = `'a'`, p++ = `'A'`;
376	}
377	if (i & `2`)
378	{
379	if (!strchr (pattern, `'b'`) && !strchr (string, `'b'`)
380	&& !strchr (pattern, `'B'`) && !strchr (string, `'B'`))
381	continue;
382	p++ = `'b'`, p++ = `'B'`;
383	}
384	if (i & `4`)
385	{
386	if (!strchr (pattern, `'c'`) && !strchr (string, `'c'`)
387	&& !strchr (pattern, `'C'`) && !strchr (string, `'C'`))
388	continue;
389	p++ = `'c'`, p++ = `'C'`;
390	}
391	if (i & `8`)
392	{
393	if (!strchr (pattern, `'d'`) && !strchr (string, `'d'`)
394	&& !strchr (pattern, `'D'`) && !strchr (string, `'D'`))
395	continue;
396	p++ = `'d'`, p++ = `'D'`;
397	}
398	*p++ = `'\0'`;
399	sprintf (fail, "UTF-8 %s FAIL", letters);
400	ret \|= mb_test (pattern, cflags, string, eflags, expect, matches,
401	letters, fail);
402	}
403	return ret;
404	}
405
406	int
407	main (int argc, char **argv)
408	{
409	int ret = `0`;
410	char *line = NULL;
411	size_t line_len = `0`;
412	ssize_t len;
413	FILE *f;
414	static int test_utf8 = `0`;
415	static const struct option options[] =
416	{
417	{"utf8", no_argument, &test_utf8, `1`},
418	{NULL, `0`, NULL, `0` }
419	};
420
421	mtrace ();
422
423	while (getopt_long (argc: argc, argv: argv, shortopts: "", longopts: options, NULL) >= `0`);
424
425	if (optind + `1` != argc)
426	{
427	fprintf (stderr, "Missing test filename\n");
428	return `1`;
429	}
430
431	f = fopen (argv[optind], "r");
432	if (f == NULL)
433	{
434	fprintf (stderr, "Couldn't open %s\n", argv[optind]);
435	return `1`;
436	}
437
438	while ((len = getline (lineptr: &line, n: &line_len, stream: f)) > `0`)
439	{
440	char pattern, flagstr, string, expect, matches, p;
441	int cflags = REG_EXTENDED, eflags = `0`, try_bre_ere = `0`;
442
443	if (line[len - `1`] == `'\n'`)
444	line[len - `1`] = `'\0'`;
445
446	/ Skip comments and empty lines. /
447	if (line == `'#'` \|\| line == `'\0'`)
448	continue;
449
450	puts (s: line);
451	fflush (stdout);
452
453	pattern = strtok (s: line, delim: "\t");
454	if (pattern == NULL)
455	continue;
456
457	if (strcmp (pattern, "\"\"") == `0`)
458	pattern += `2`;
459
460	flagstr = strtok (NULL, delim: "\t");
461	if (flagstr == NULL)
462	continue;
463
464	string = strtok (NULL, delim: "\t");
465	if (string == NULL)
466	continue;
467
468	if (strcmp (string, "\"\"") == `0`)
469	string += `2`;
470
471	for (p = flagstr; *p; ++p)
472	switch (*p)
473	{
474	case `'-'`:
475	break;
476	case `'b'`:
477	cflags &= ~REG_EXTENDED;
478	break;
479	case `'&'`:
480	try_bre_ere = `1`;
481	break;
482	case `'C'`:
483	eflags = -`1`;
484	break;
485	case `'i'`:
486	cflags \|= REG_ICASE;
487	break;
488	case `'s'`:
489	cflags \|= REG_NOSUB;
490	break;
491	case `'n'`:
492	cflags \|= REG_NEWLINE;
493	break;
494	case `'^'`:
495	eflags \|= REG_NOTBOL;
496	break;
497	case `'$'`:
498	eflags \|= REG_NOTEOL;
499	break;
500	case `'m'`:
501	case `'p'`:
502	case `'#'`:
503	/ Not supported. /
504	flagstr = NULL;
505	break;
506	}
507
508	if (flagstr == NULL)
509	continue;
510
511	replace_special_chars (str: pattern);
512	glibc_re_syntax (str: pattern);
513	if (eflags != -`1`)
514	replace_special_chars (str: string);
515
516	expect = strtok (NULL, delim: "\t");
517	matches = NULL;
518	if (expect != NULL)
519	{
520	replace_special_chars (str: expect);
521	matches = strtok (NULL, delim: "\t");
522	if (matches != NULL)
523	replace_special_chars (str: matches);
524	}
525
526	if (setlocale (LC_ALL, "C") == NULL)
527	{
528	puts (s: "setlocale C failed");
529	ret = `1`;
530	}
531	if (test (pattern, cflags, string, eflags, expect, matches, fail: "FAIL")
532	\|\| (try_bre_ere
533	&& test (pattern, cflags: cflags & ~REG_EXTENDED, string, eflags,
534	expect, matches, fail: "FAIL")))
535	ret = `1`;
536	else if (test_utf8)
537	{
538	if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
539	{
540	puts (s: "setlocale cs_CZ.UTF-8 failed");
541	ret = `1`;
542	}
543	else if (test (pattern, cflags, string, eflags, expect, matches,
544	fail: "UTF-8 FAIL")
545	\|\| (try_bre_ere
546	&& test (pattern, cflags: cflags & ~REG_EXTENDED, string,
547	eflags, expect, matches, fail: "UTF-8 FAIL")))
548	ret = `1`;
549	else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
550	\|\| (try_bre_ere
551	&& mb_tests (pattern, cflags: cflags & ~REG_EXTENDED, string,
552	eflags, expect, matches)))
553	ret = `1`;
554	}
555	}
556
557	free (ptr: line);
558	fclose (f);
559	return ret;
560	}
561

source code of glibc/posix/tst-rxspencer.c