1/* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "config.h"
19
20#include <string.h>
21
22#include "gpattern.h"
23
24#include "gmacros.h"
25#include "gmessages.h"
26#include "gmem.h"
27#include "gunicode.h"
28#include "gutils.h"
29
30/**
31 * SECTION:patterns
32 * @title: Glob-style pattern matching
33 * @short_description: matches strings against patterns containing '*'
34 * (wildcard) and '?' (joker)
35 *
36 * The g_pattern_match* functions match a string
37 * against a pattern containing '*' and '?' wildcards with similar
38 * semantics as the standard glob() function: '*' matches an arbitrary,
39 * possibly empty, string, '?' matches an arbitrary character.
40 *
41 * Note that in contrast to glob(), the '/' character can be matched by
42 * the wildcards, there are no '[...]' character ranges and '*' and '?'
43 * can not be escaped to include them literally in a pattern.
44 *
45 * When multiple strings must be matched against the same pattern, it
46 * is better to compile the pattern to a #GPatternSpec using
47 * g_pattern_spec_new() and use g_pattern_match_string() instead of
48 * g_pattern_match_simple(). This avoids the overhead of repeated
49 * pattern compilation.
50 **/
51
52/**
53 * GPatternSpec:
54 *
55 * A GPatternSpec struct is the 'compiled' form of a pattern. This
56 * structure is opaque and its fields cannot be accessed directly.
57 */
58
59/* keep enum and structure of gpattern.c and patterntest.c in sync */
60typedef enum
61{
62 G_MATCH_ALL, /* "*A?A*" */
63 G_MATCH_ALL_TAIL, /* "*A?AA" */
64 G_MATCH_HEAD, /* "AAAA*" */
65 G_MATCH_TAIL, /* "*AAAA" */
66 G_MATCH_EXACT, /* "AAAAA" */
67 G_MATCH_LAST
68} GMatchType;
69
70struct _GPatternSpec
71{
72 GMatchType match_type;
73 guint pattern_length;
74 guint min_length;
75 guint max_length;
76 gchar *pattern;
77};
78
79
80/* --- functions --- */
81static inline gboolean
82g_pattern_ph_match (const gchar *match_pattern,
83 const gchar *match_string,
84 gboolean *wildcard_reached_p)
85{
86 const gchar *pattern, *string;
87 gchar ch;
88
89 pattern = match_pattern;
90 string = match_string;
91
92 ch = *pattern;
93 pattern++;
94 while (ch)
95 {
96 switch (ch)
97 {
98 case '?':
99 if (!*string)
100 return FALSE;
101 string = g_utf8_next_char (string);
102 break;
103
104 case '*':
105 *wildcard_reached_p = TRUE;
106 do
107 {
108 ch = *pattern;
109 pattern++;
110 if (ch == '?')
111 {
112 if (!*string)
113 return FALSE;
114 string = g_utf8_next_char (string);
115 }
116 }
117 while (ch == '*' || ch == '?');
118 if (!ch)
119 return TRUE;
120 do
121 {
122 gboolean next_wildcard_reached = FALSE;
123 while (ch != *string)
124 {
125 if (!*string)
126 return FALSE;
127 string = g_utf8_next_char (string);
128 }
129 string++;
130 if (g_pattern_ph_match (match_pattern: pattern, match_string: string, wildcard_reached_p: &next_wildcard_reached))
131 return TRUE;
132 if (next_wildcard_reached)
133 /* the forthcoming pattern substring up to the next wildcard has
134 * been matched, but a mismatch occurred for the rest of the
135 * pattern, following the next wildcard.
136 * there's no need to advance the current match position any
137 * further if the rest pattern will not match.
138 */
139 return FALSE;
140 }
141 while (*string);
142 break;
143
144 default:
145 if (ch == *string)
146 string++;
147 else
148 return FALSE;
149 break;
150 }
151
152 ch = *pattern;
153 pattern++;
154 }
155
156 return *string == 0;
157}
158
159/**
160 * g_pattern_match:
161 * @pspec: a #GPatternSpec
162 * @string_length: the length of @string (in bytes, i.e. strlen(),
163 * not g_utf8_strlen())
164 * @string: the UTF-8 encoded string to match
165 * @string_reversed: (nullable): the reverse of @string or %NULL
166 *
167 * Matches a string against a compiled pattern. Passing the correct
168 * length of the string given is mandatory. The reversed string can be
169 * omitted by passing %NULL, this is more efficient if the reversed
170 * version of the string to be matched is not at hand, as
171 * g_pattern_match() will only construct it if the compiled pattern
172 * requires reverse matches.
173 *
174 * Note that, if the user code will (possibly) match a string against a
175 * multitude of patterns containing wildcards, chances are high that
176 * some patterns will require a reversed string. In this case, it's
177 * more efficient to provide the reversed string to avoid multiple
178 * constructions thereof in the various calls to g_pattern_match().
179 *
180 * Note also that the reverse of a UTF-8 encoded string can in general
181 * not be obtained by g_strreverse(). This works only if the string
182 * does not contain any multibyte characters. GLib offers the
183 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
184 *
185 * Returns: %TRUE if @string matches @pspec
186 **/
187gboolean
188g_pattern_match (GPatternSpec *pspec,
189 guint string_length,
190 const gchar *string,
191 const gchar *string_reversed)
192{
193 g_return_val_if_fail (pspec != NULL, FALSE);
194 g_return_val_if_fail (string != NULL, FALSE);
195
196 if (string_length < pspec->min_length ||
197 string_length > pspec->max_length)
198 return FALSE;
199
200 switch (pspec->match_type)
201 {
202 gboolean dummy;
203 case G_MATCH_ALL:
204 return g_pattern_ph_match (match_pattern: pspec->pattern, match_string: string, wildcard_reached_p: &dummy);
205 case G_MATCH_ALL_TAIL:
206 if (string_reversed)
207 return g_pattern_ph_match (match_pattern: pspec->pattern, match_string: string_reversed, wildcard_reached_p: &dummy);
208 else
209 {
210 gboolean result;
211 gchar *tmp;
212 tmp = g_utf8_strreverse (str: string, len: string_length);
213 result = g_pattern_ph_match (match_pattern: pspec->pattern, match_string: tmp, wildcard_reached_p: &dummy);
214 g_free (mem: tmp);
215 return result;
216 }
217 case G_MATCH_HEAD:
218 if (pspec->pattern_length == string_length)
219 return strcmp (s1: pspec->pattern, s2: string) == 0;
220 else if (pspec->pattern_length)
221 return strncmp (s1: pspec->pattern, s2: string, n: pspec->pattern_length) == 0;
222 else
223 return TRUE;
224 case G_MATCH_TAIL:
225 if (pspec->pattern_length)
226 return strcmp (s1: pspec->pattern, s2: string + (string_length - pspec->pattern_length)) == 0;
227 else
228 return TRUE;
229 case G_MATCH_EXACT:
230 if (pspec->pattern_length != string_length)
231 return FALSE;
232 else
233 return strcmp (s1: pspec->pattern, s2: string) == 0;
234 default:
235 g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
236 return FALSE;
237 }
238}
239
240/**
241 * g_pattern_spec_new:
242 * @pattern: a zero-terminated UTF-8 encoded string
243 *
244 * Compiles a pattern to a #GPatternSpec.
245 *
246 * Returns: a newly-allocated #GPatternSpec
247 **/
248GPatternSpec*
249g_pattern_spec_new (const gchar *pattern)
250{
251 GPatternSpec *pspec;
252 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
253 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
254 gboolean follows_wildcard = FALSE;
255 guint pending_jokers = 0;
256 const gchar *s;
257 gchar *d;
258 guint i;
259
260 g_return_val_if_fail (pattern != NULL, NULL);
261
262 /* canonicalize pattern and collect necessary stats */
263 pspec = g_new (GPatternSpec, 1);
264 pspec->pattern_length = strlen (s: pattern);
265 pspec->min_length = 0;
266 pspec->max_length = 0;
267 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
268 d = pspec->pattern;
269 for (i = 0, s = pattern; *s != 0; s++)
270 {
271 switch (*s)
272 {
273 case '*':
274 if (follows_wildcard) /* compress multiple wildcards */
275 {
276 pspec->pattern_length--;
277 continue;
278 }
279 follows_wildcard = TRUE;
280 if (hw_pos < 0)
281 hw_pos = i;
282 tw_pos = i;
283 break;
284 case '?':
285 pending_jokers++;
286 pspec->min_length++;
287 pspec->max_length += 4; /* maximum UTF-8 character length */
288 continue;
289 default:
290 for (; pending_jokers; pending_jokers--, i++) {
291 *d++ = '?';
292 if (hj_pos < 0)
293 hj_pos = i;
294 tj_pos = i;
295 }
296 follows_wildcard = FALSE;
297 pspec->min_length++;
298 pspec->max_length++;
299 break;
300 }
301 *d++ = *s;
302 i++;
303 }
304 for (; pending_jokers; pending_jokers--) {
305 *d++ = '?';
306 if (hj_pos < 0)
307 hj_pos = i;
308 tj_pos = i;
309 }
310 *d++ = 0;
311 seen_joker = hj_pos >= 0;
312 seen_wildcard = hw_pos >= 0;
313 more_wildcards = seen_wildcard && hw_pos != tw_pos;
314 if (seen_wildcard)
315 pspec->max_length = G_MAXUINT;
316
317 /* special case sole head/tail wildcard or exact matches */
318 if (!seen_joker && !more_wildcards)
319 {
320 if (pspec->pattern[0] == '*')
321 {
322 pspec->match_type = G_MATCH_TAIL;
323 memmove (dest: pspec->pattern, src: pspec->pattern + 1, n: --pspec->pattern_length);
324 pspec->pattern[pspec->pattern_length] = 0;
325 return pspec;
326 }
327 if (pspec->pattern_length > 0 &&
328 pspec->pattern[pspec->pattern_length - 1] == '*')
329 {
330 pspec->match_type = G_MATCH_HEAD;
331 pspec->pattern[--pspec->pattern_length] = 0;
332 return pspec;
333 }
334 if (!seen_wildcard)
335 {
336 pspec->match_type = G_MATCH_EXACT;
337 return pspec;
338 }
339 }
340
341 /* now just need to distinguish between head or tail match start */
342 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
343 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
344 if (seen_wildcard)
345 pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
346 else /* seen_joker */
347 pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
348 if (pspec->match_type == G_MATCH_ALL_TAIL) {
349 gchar *tmp = pspec->pattern;
350 pspec->pattern = g_utf8_strreverse (str: pspec->pattern, len: pspec->pattern_length);
351 g_free (mem: tmp);
352 }
353 return pspec;
354}
355
356/**
357 * g_pattern_spec_free:
358 * @pspec: a #GPatternSpec
359 *
360 * Frees the memory allocated for the #GPatternSpec.
361 **/
362void
363g_pattern_spec_free (GPatternSpec *pspec)
364{
365 g_return_if_fail (pspec != NULL);
366
367 g_free (mem: pspec->pattern);
368 g_free (mem: pspec);
369}
370
371/**
372 * g_pattern_spec_equal:
373 * @pspec1: a #GPatternSpec
374 * @pspec2: another #GPatternSpec
375 *
376 * Compares two compiled pattern specs and returns whether they will
377 * match the same set of strings.
378 *
379 * Returns: Whether the compiled patterns are equal
380 **/
381gboolean
382g_pattern_spec_equal (GPatternSpec *pspec1,
383 GPatternSpec *pspec2)
384{
385 g_return_val_if_fail (pspec1 != NULL, FALSE);
386 g_return_val_if_fail (pspec2 != NULL, FALSE);
387
388 return (pspec1->pattern_length == pspec2->pattern_length &&
389 pspec1->match_type == pspec2->match_type &&
390 strcmp (s1: pspec1->pattern, s2: pspec2->pattern) == 0);
391}
392
393/**
394 * g_pattern_match_string:
395 * @pspec: a #GPatternSpec
396 * @string: the UTF-8 encoded string to match
397 *
398 * Matches a string against a compiled pattern. If the string is to be
399 * matched against more than one pattern, consider using
400 * g_pattern_match() instead while supplying the reversed string.
401 *
402 * Returns: %TRUE if @string matches @pspec
403 **/
404gboolean
405g_pattern_match_string (GPatternSpec *pspec,
406 const gchar *string)
407{
408 g_return_val_if_fail (pspec != NULL, FALSE);
409 g_return_val_if_fail (string != NULL, FALSE);
410
411 return g_pattern_match (pspec, string_length: strlen (s: string), string, NULL);
412}
413
414/**
415 * g_pattern_match_simple:
416 * @pattern: the UTF-8 encoded pattern
417 * @string: the UTF-8 encoded string to match
418 *
419 * Matches a string against a pattern given as a string. If this
420 * function is to be called in a loop, it's more efficient to compile
421 * the pattern once with g_pattern_spec_new() and call
422 * g_pattern_match_string() repeatedly.
423 *
424 * Returns: %TRUE if @string matches @pspec
425 **/
426gboolean
427g_pattern_match_simple (const gchar *pattern,
428 const gchar *string)
429{
430 GPatternSpec *pspec;
431 gboolean ergo;
432
433 g_return_val_if_fail (pattern != NULL, FALSE);
434 g_return_val_if_fail (string != NULL, FALSE);
435
436 pspec = g_pattern_spec_new (pattern);
437 ergo = g_pattern_match (pspec, string_length: strlen (s: string), string, NULL);
438 g_pattern_spec_free (pspec);
439
440 return ergo;
441}
442

source code of gtk/subprojects/glib/glib/gpattern.c