1 | /* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. |
2 | * |
3 | * This library is free software; you can redistribute it and/or |
4 | * modify it under the terms of the GNU Lesser General Public |
5 | * License as published by the Free Software Foundation; either |
6 | * version 2 of the License, or (at your option) any later version. |
7 | * |
8 | * This library is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * Lesser General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library. If not, see <http://www.gnu.org/licenses/>. |
15 | */ |
16 | |
17 | /* |
18 | * Modified by the GTK+ Team and others 1997-2000. See the AUTHORS |
19 | * file for a list of people on the GTK+ Team. See the ChangeLog |
20 | * files for a list of changes. These files are distributed with |
21 | * GTK+ at ftp://ftp.gtk.org/pub/gtk/. |
22 | */ |
23 | |
24 | /* |
25 | * Stripped down, converted to UTF-8 and test cases added |
26 | * |
27 | * Owen Taylor, 13 December 2002; |
28 | */ |
29 | |
30 | #include "config.h" |
31 | |
32 | #include "gtkprivate.h" |
33 | |
34 | #include <string.h> |
35 | |
36 | #include <glib.h> |
37 | |
38 | static gunichar |
39 | get_char (const char **str, |
40 | gboolean casefold) |
41 | { |
42 | gunichar c = g_utf8_get_char (p: *str); |
43 | *str = g_utf8_next_char (*str); |
44 | |
45 | if (casefold) |
46 | c = g_unichar_tolower (c); |
47 | |
48 | return c; |
49 | } |
50 | |
51 | #if defined(G_OS_WIN32) || defined(G_WITH_CYGWIN) |
52 | #define DO_ESCAPE 0 |
53 | #else |
54 | #define DO_ESCAPE 1 |
55 | #endif |
56 | |
57 | static gunichar |
58 | get_unescaped_char (const char **str, |
59 | gboolean *was_escaped, |
60 | gboolean casefold) |
61 | { |
62 | gunichar c = get_char (str, casefold); |
63 | |
64 | *was_escaped = DO_ESCAPE && c == '\\'; |
65 | if (*was_escaped) |
66 | c = get_char (str, casefold); |
67 | |
68 | return c; |
69 | } |
70 | |
71 | /* Match STRING against the filename pattern PATTERN, returning zero if |
72 | it matches, nonzero if not. */ |
73 | |
74 | static gboolean |
75 | gtk_fnmatch_intern (const char *pattern, |
76 | const char *string, |
77 | gboolean component_start, |
78 | gboolean no_leading_period, |
79 | gboolean casefold) |
80 | { |
81 | const char *p = pattern, *n = string; |
82 | |
83 | while (*p) |
84 | { |
85 | const char *last_n = n; |
86 | |
87 | gunichar c = get_char (str: &p, casefold); |
88 | gunichar nc = get_char (str: &n, casefold); |
89 | |
90 | switch (c) |
91 | { |
92 | case '?': |
93 | if (nc == '\0') |
94 | return FALSE; |
95 | else if (nc == G_DIR_SEPARATOR) |
96 | return FALSE; |
97 | else if (nc == '.' && component_start && no_leading_period) |
98 | return FALSE; |
99 | break; |
100 | case '\\': |
101 | if (DO_ESCAPE) |
102 | c = get_char (str: &p, casefold); |
103 | if (nc != c) |
104 | return FALSE; |
105 | break; |
106 | case '*': |
107 | if (nc == '.' && component_start && no_leading_period) |
108 | return FALSE; |
109 | |
110 | { |
111 | const char *last_p; |
112 | |
113 | for (last_p = p, c = get_char (str: &p, casefold); |
114 | c == '?' || c == '*'; |
115 | last_p = p, c = get_char (str: &p, casefold)) |
116 | { |
117 | if (c == '?') |
118 | { |
119 | if (nc == '\0') |
120 | return FALSE; |
121 | else if (nc == G_DIR_SEPARATOR) |
122 | return FALSE; |
123 | else |
124 | { |
125 | last_n = n; nc = get_char (str: &n, casefold); |
126 | } |
127 | } |
128 | } |
129 | |
130 | /* If the pattern ends with wildcards, we have a |
131 | * guaranteed match unless there is a dir separator |
132 | * in the remainder of the string. |
133 | */ |
134 | if (c == '\0') |
135 | { |
136 | if (strchr (s: last_n, G_DIR_SEPARATOR) != NULL) |
137 | return FALSE; |
138 | else |
139 | return TRUE; |
140 | } |
141 | |
142 | if (DO_ESCAPE && c == '\\') |
143 | c = get_char (str: &p, casefold); |
144 | |
145 | for (p = last_p; nc != '\0';) |
146 | { |
147 | if ((c == '[' || nc == c) && |
148 | gtk_fnmatch_intern (pattern: p, string: last_n, component_start, no_leading_period, casefold)) |
149 | return TRUE; |
150 | |
151 | component_start = (nc == G_DIR_SEPARATOR); |
152 | last_n = n; |
153 | nc = get_char (str: &n, casefold); |
154 | } |
155 | |
156 | return FALSE; |
157 | } |
158 | |
159 | case '[': |
160 | { |
161 | /* Nonzero if the sense of the character class is inverted. */ |
162 | gboolean not; |
163 | gboolean was_escaped; |
164 | |
165 | if (nc == '\0' || nc == G_DIR_SEPARATOR) |
166 | return FALSE; |
167 | |
168 | if (nc == '.' && component_start && no_leading_period) |
169 | return FALSE; |
170 | |
171 | not = (*p == '!' || *p == '^'); |
172 | if (not) |
173 | ++p; |
174 | |
175 | c = get_unescaped_char (str: &p, was_escaped: &was_escaped, casefold); |
176 | for (;;) |
177 | { |
178 | register gunichar cstart = c, cend = c; |
179 | if (c == '\0') |
180 | /* [ (unterminated) loses. */ |
181 | return FALSE; |
182 | |
183 | c = get_unescaped_char (str: &p, was_escaped: &was_escaped, casefold); |
184 | |
185 | if (!was_escaped && c == '-' && *p != ']') |
186 | { |
187 | cend = get_unescaped_char (str: &p, was_escaped: &was_escaped, casefold); |
188 | if (cend == '\0') |
189 | return FALSE; |
190 | |
191 | c = get_char (str: &p, casefold); |
192 | } |
193 | |
194 | if (nc >= cstart && nc <= cend) |
195 | goto matched; |
196 | |
197 | if (!was_escaped && c == ']') |
198 | break; |
199 | } |
200 | if (!not) |
201 | return FALSE; |
202 | break; |
203 | |
204 | matched:; |
205 | /* Skip the rest of the [...] that already matched. */ |
206 | /* XXX 1003.2d11 is unclear if was_escaped is right. */ |
207 | while (was_escaped || c != ']') |
208 | { |
209 | if (c == '\0') |
210 | /* [... (unterminated) loses. */ |
211 | return FALSE; |
212 | |
213 | c = get_unescaped_char (str: &p, was_escaped: &was_escaped, casefold); |
214 | } |
215 | if (not) |
216 | return FALSE; |
217 | } |
218 | break; |
219 | |
220 | default: |
221 | if (c != nc) |
222 | return FALSE; |
223 | } |
224 | |
225 | component_start = (nc == G_DIR_SEPARATOR); |
226 | } |
227 | |
228 | if (*n == '\0') |
229 | return TRUE; |
230 | |
231 | return FALSE; |
232 | } |
233 | |
234 | /* Match STRING against the filename pattern PATTERN, returning zero if |
235 | * it matches, nonzero if not. |
236 | * |
237 | * GTK+ used to use a old version of GNU fnmatch() that was buggy |
238 | * in various ways and didn’t handle UTF-8. The following is |
239 | * converted to UTF-8. To simplify the process of making it |
240 | * correct, this is special-cased to the combinations of flags |
241 | * that gtkfilesel.c uses. |
242 | * |
243 | * FNM_FILE_NAME - always set |
244 | * FNM_LEADING_DIR - never set |
245 | * FNM_NOESCAPE - set only on windows |
246 | * FNM_CASEFOLD - set only on windows |
247 | */ |
248 | gboolean |
249 | _gtk_fnmatch (const char *pattern, |
250 | const char *string, |
251 | gboolean no_leading_period, |
252 | gboolean casefold) |
253 | { |
254 | return gtk_fnmatch_intern (pattern, string, TRUE, no_leading_period, casefold); |
255 | } |
256 | |
257 | /* Turn a glob pattern into a case-insensitive one, by replacing |
258 | * alphabetic characters by [xX] ranges. |
259 | */ |
260 | char * |
261 | _gtk_make_ci_glob_pattern (const char *pattern) |
262 | { |
263 | GString *s; |
264 | gboolean in_range = FALSE; |
265 | |
266 | s = g_string_new (init: "" ); |
267 | for (const char *p = pattern; *p; p = g_utf8_next_char (p)) |
268 | { |
269 | gunichar c = g_utf8_get_char (p); |
270 | if (in_range) |
271 | { |
272 | g_string_append_unichar (string: s, wc: c); |
273 | if (c == ']') |
274 | in_range = FALSE; |
275 | continue; |
276 | } |
277 | |
278 | #if DO_ESCAPE |
279 | if (c == '\\') |
280 | { |
281 | g_string_append (string: s, val: "\\" ); |
282 | p = g_utf8_next_char (p); |
283 | if (*p == '\0') |
284 | break; |
285 | |
286 | c = g_utf8_get_char (p); |
287 | g_string_append_unichar (string: s, wc: c); |
288 | continue; |
289 | } |
290 | #endif |
291 | |
292 | if (c == '[') |
293 | { |
294 | g_string_append (string: s, val: "[" ); |
295 | p = g_utf8_next_char (p); |
296 | if (*p == '\0') |
297 | break; |
298 | |
299 | c = g_utf8_get_char (p); |
300 | g_string_append_unichar (string: s, wc: c); |
301 | |
302 | in_range = TRUE; |
303 | continue; |
304 | } |
305 | else if (g_unichar_isalpha (c)) |
306 | { |
307 | g_string_append (string: s, val: "[" ); |
308 | g_string_append_unichar (string: s, wc: g_unichar_tolower (c)); |
309 | g_string_append_unichar (string: s, wc: g_unichar_toupper (c)); |
310 | g_string_append (string: s, val: "]" ); |
311 | } |
312 | else |
313 | { |
314 | g_string_append_unichar (string: s, wc: c); |
315 | } |
316 | } |
317 | |
318 | return g_string_free (string: s, FALSE); |
319 | } |
320 | |