1 | /* -*- mode: C; c-file-style: "gnu" -*- */ |
2 | /* xdgmimeglob.c: Private file. Datastructure for storing the globs. |
3 | * |
4 | * More info can be found at http://www.freedesktop.org/standards/ |
5 | * |
6 | * Copyright (C) 2003 Red Hat, Inc. |
7 | * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu> |
8 | * |
9 | * Licensed under the Academic Free License version 2.0 |
10 | * Or under the following terms: |
11 | * |
12 | * This library is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU Lesser General Public |
14 | * License as published by the Free Software Foundation; either |
15 | * version 2.1 of the License, or (at your option) any later version. |
16 | * |
17 | * This library is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * Lesser General Public License for more details. |
21 | * |
22 | * You should have received a copy of the GNU Lesser General Public |
23 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | |
28 | #include "xdgmimeglob.h" |
29 | #include "xdgmimeint.h" |
30 | #include <stdlib.h> |
31 | #include <stdio.h> |
32 | #include <assert.h> |
33 | #include <string.h> |
34 | #include <fnmatch.h> |
35 | |
36 | #ifndef MAX |
37 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) |
38 | #endif |
39 | |
40 | #ifndef FALSE |
41 | #define FALSE (0) |
42 | #endif |
43 | |
44 | #ifndef TRUE |
45 | #define TRUE (!FALSE) |
46 | #endif |
47 | |
48 | typedef struct XdgGlobHashNode XdgGlobHashNode; |
49 | typedef struct XdgGlobList XdgGlobList; |
50 | |
51 | struct XdgGlobHashNode |
52 | { |
53 | xdg_unichar_t character; |
54 | const char *mime_type; |
55 | int weight; |
56 | int case_sensitive; |
57 | XdgGlobHashNode *next; |
58 | XdgGlobHashNode *child; |
59 | }; |
60 | struct XdgGlobList |
61 | { |
62 | const char *data; |
63 | const char *mime_type; |
64 | int weight; |
65 | int case_sensitive; |
66 | XdgGlobList *next; |
67 | }; |
68 | |
69 | struct XdgGlobHash |
70 | { |
71 | XdgGlobList *literal_list; |
72 | XdgGlobHashNode *simple_node; |
73 | XdgGlobList *full_list; |
74 | }; |
75 | |
76 | |
77 | /* XdgGlobList |
78 | */ |
79 | static XdgGlobList * |
80 | _xdg_glob_list_new (void) |
81 | { |
82 | XdgGlobList *new_element; |
83 | |
84 | new_element = calloc (nmemb: 1, size: sizeof (XdgGlobList)); |
85 | |
86 | return new_element; |
87 | } |
88 | |
89 | /* Frees glob_list and all of it's children */ |
90 | static void |
91 | _xdg_glob_list_free (XdgGlobList *glob_list) |
92 | { |
93 | XdgGlobList *ptr, *next; |
94 | |
95 | ptr = glob_list; |
96 | |
97 | while (ptr != NULL) |
98 | { |
99 | next = ptr->next; |
100 | |
101 | if (ptr->data) |
102 | free (ptr: (void *) ptr->data); |
103 | if (ptr->mime_type) |
104 | free (ptr: (void *) ptr->mime_type); |
105 | free (ptr: ptr); |
106 | |
107 | ptr = next; |
108 | } |
109 | } |
110 | |
111 | static XdgGlobList * |
112 | _xdg_glob_list_append (XdgGlobList *glob_list, |
113 | void *data, |
114 | const char *mime_type, |
115 | int weight, |
116 | int case_sensitive) |
117 | { |
118 | XdgGlobList *new_element; |
119 | XdgGlobList *tmp_element; |
120 | |
121 | tmp_element = glob_list; |
122 | while (tmp_element != NULL) |
123 | { |
124 | if (strcmp (s1: tmp_element->data, s2: data) == 0 && |
125 | strcmp (s1: tmp_element->mime_type, s2: mime_type) == 0) |
126 | return glob_list; |
127 | |
128 | tmp_element = tmp_element->next; |
129 | } |
130 | |
131 | new_element = _xdg_glob_list_new (); |
132 | new_element->data = data; |
133 | new_element->mime_type = mime_type; |
134 | new_element->weight = weight; |
135 | new_element->case_sensitive = case_sensitive; |
136 | if (glob_list == NULL) |
137 | return new_element; |
138 | |
139 | tmp_element = glob_list; |
140 | while (tmp_element->next != NULL) |
141 | tmp_element = tmp_element->next; |
142 | |
143 | tmp_element->next = new_element; |
144 | |
145 | return glob_list; |
146 | } |
147 | |
148 | /* XdgGlobHashNode |
149 | */ |
150 | |
151 | static XdgGlobHashNode * |
152 | _xdg_glob_hash_node_new (void) |
153 | { |
154 | XdgGlobHashNode *glob_hash_node; |
155 | |
156 | glob_hash_node = calloc (nmemb: 1, size: sizeof (XdgGlobHashNode)); |
157 | |
158 | return glob_hash_node; |
159 | } |
160 | |
161 | #ifdef NOT_USED_IN_GIO |
162 | |
163 | static void |
164 | _xdg_glob_hash_node_dump (XdgGlobHashNode *glob_hash_node, |
165 | int depth) |
166 | { |
167 | int i; |
168 | for (i = 0; i < depth; i++) |
169 | printf (" " ); |
170 | |
171 | printf ("%c" , (char)glob_hash_node->character); |
172 | if (glob_hash_node->mime_type) |
173 | printf (" - %s %d\n" , glob_hash_node->mime_type, glob_hash_node->weight); |
174 | else |
175 | printf ("\n" ); |
176 | if (glob_hash_node->child) |
177 | _xdg_glob_hash_node_dump (glob_hash_node->child, depth + 1); |
178 | if (glob_hash_node->next) |
179 | _xdg_glob_hash_node_dump (glob_hash_node->next, depth); |
180 | } |
181 | |
182 | #endif |
183 | |
184 | static XdgGlobHashNode * |
185 | _xdg_glob_hash_insert_ucs4 (XdgGlobHashNode *glob_hash_node, |
186 | xdg_unichar_t *text, |
187 | const char *mime_type, |
188 | int weight, |
189 | int case_sensitive) |
190 | { |
191 | XdgGlobHashNode *node; |
192 | xdg_unichar_t character; |
193 | |
194 | character = text[0]; |
195 | |
196 | if ((glob_hash_node == NULL) || |
197 | (character < glob_hash_node->character)) |
198 | { |
199 | node = _xdg_glob_hash_node_new (); |
200 | node->character = character; |
201 | node->next = glob_hash_node; |
202 | glob_hash_node = node; |
203 | } |
204 | else if (character == glob_hash_node->character) |
205 | { |
206 | node = glob_hash_node; |
207 | } |
208 | else |
209 | { |
210 | XdgGlobHashNode *prev_node; |
211 | int found_node = FALSE; |
212 | |
213 | /* Look for the first character of text in glob_hash_node, and insert it if we |
214 | * have to.*/ |
215 | prev_node = glob_hash_node; |
216 | node = prev_node->next; |
217 | |
218 | while (node != NULL) |
219 | { |
220 | if (character < node->character) |
221 | { |
222 | node = _xdg_glob_hash_node_new (); |
223 | node->character = character; |
224 | node->next = prev_node->next; |
225 | prev_node->next = node; |
226 | |
227 | found_node = TRUE; |
228 | break; |
229 | } |
230 | else if (character == node->character) |
231 | { |
232 | found_node = TRUE; |
233 | break; |
234 | } |
235 | prev_node = node; |
236 | node = node->next; |
237 | } |
238 | |
239 | if (! found_node) |
240 | { |
241 | node = _xdg_glob_hash_node_new (); |
242 | node->character = character; |
243 | node->next = prev_node->next; |
244 | prev_node->next = node; |
245 | } |
246 | } |
247 | |
248 | text++; |
249 | if (*text == 0) |
250 | { |
251 | if (node->mime_type) |
252 | { |
253 | if (strcmp (s1: node->mime_type, s2: mime_type) != 0) |
254 | { |
255 | XdgGlobHashNode *child; |
256 | int found_node = FALSE; |
257 | |
258 | child = node->child; |
259 | while (child && child->character == 0) |
260 | { |
261 | if (strcmp (s1: child->mime_type, s2: mime_type) == 0) |
262 | { |
263 | found_node = TRUE; |
264 | break; |
265 | } |
266 | child = child->next; |
267 | } |
268 | |
269 | if (!found_node) |
270 | { |
271 | child = _xdg_glob_hash_node_new (); |
272 | child->character = 0; |
273 | child->mime_type = strdup (s: mime_type); |
274 | child->weight = weight; |
275 | child->case_sensitive = case_sensitive; |
276 | child->child = NULL; |
277 | child->next = node->child; |
278 | node->child = child; |
279 | } |
280 | } |
281 | } |
282 | else |
283 | { |
284 | node->mime_type = strdup (s: mime_type); |
285 | node->weight = weight; |
286 | node->case_sensitive = case_sensitive; |
287 | } |
288 | } |
289 | else |
290 | { |
291 | node->child = _xdg_glob_hash_insert_ucs4 (glob_hash_node: node->child, text, mime_type, weight, case_sensitive); |
292 | } |
293 | return glob_hash_node; |
294 | } |
295 | |
296 | /* glob must be valid UTF-8 */ |
297 | static XdgGlobHashNode * |
298 | _xdg_glob_hash_insert_text (XdgGlobHashNode *glob_hash_node, |
299 | const char *text, |
300 | const char *mime_type, |
301 | int weight, |
302 | int case_sensitive) |
303 | { |
304 | XdgGlobHashNode *node; |
305 | xdg_unichar_t *unitext; |
306 | int len; |
307 | |
308 | unitext = _xdg_convert_to_ucs4 (source: text, len: &len); |
309 | _xdg_reverse_ucs4 (source: unitext, len); |
310 | node = _xdg_glob_hash_insert_ucs4 (glob_hash_node, text: unitext, mime_type, weight, case_sensitive); |
311 | free (ptr: unitext); |
312 | return node; |
313 | } |
314 | |
315 | typedef struct { |
316 | const char *mime; |
317 | int weight; |
318 | } MimeWeight; |
319 | |
320 | static int |
321 | _xdg_glob_hash_node_lookup_file_name (XdgGlobHashNode *glob_hash_node, |
322 | const char *file_name, |
323 | int len, |
324 | int case_sensitive_check, |
325 | MimeWeight mime_types[], |
326 | int n_mime_types) |
327 | { |
328 | int n; |
329 | XdgGlobHashNode *node; |
330 | xdg_unichar_t character; |
331 | |
332 | if (glob_hash_node == NULL) |
333 | return 0; |
334 | |
335 | character = file_name[len - 1]; |
336 | |
337 | for (node = glob_hash_node; node && character >= node->character; node = node->next) |
338 | { |
339 | if (character == node->character) |
340 | { |
341 | len--; |
342 | n = 0; |
343 | if (len > 0) |
344 | { |
345 | n = _xdg_glob_hash_node_lookup_file_name (glob_hash_node: node->child, |
346 | file_name, |
347 | len, |
348 | case_sensitive_check, |
349 | mime_types, |
350 | n_mime_types); |
351 | } |
352 | if (n == 0) |
353 | { |
354 | if (node->mime_type && |
355 | (case_sensitive_check || |
356 | !node->case_sensitive)) |
357 | { |
358 | mime_types[n].mime = node->mime_type; |
359 | mime_types[n].weight = node->weight; |
360 | n++; |
361 | } |
362 | node = node->child; |
363 | while (n < n_mime_types && node && node->character == 0) |
364 | { |
365 | if (node->mime_type && |
366 | (case_sensitive_check || |
367 | !node->case_sensitive)) |
368 | { |
369 | mime_types[n].mime = node->mime_type; |
370 | mime_types[n].weight = node->weight; |
371 | n++; |
372 | } |
373 | node = node->next; |
374 | } |
375 | } |
376 | return n; |
377 | } |
378 | } |
379 | |
380 | return 0; |
381 | } |
382 | |
383 | static int compare_mime_weight (const void *a, const void *b) |
384 | { |
385 | const MimeWeight *aa = (const MimeWeight *)a; |
386 | const MimeWeight *bb = (const MimeWeight *)b; |
387 | |
388 | return bb->weight - aa->weight; |
389 | } |
390 | |
391 | #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z') |
392 | static char * |
393 | ascii_tolower (const char *str) |
394 | { |
395 | char *p, *lower; |
396 | |
397 | lower = strdup (s: str); |
398 | p = lower; |
399 | while (*p != 0) |
400 | { |
401 | char c = *p; |
402 | *p++ = ISUPPER (c) ? c - 'A' + 'a' : c; |
403 | } |
404 | return lower; |
405 | } |
406 | |
407 | static int |
408 | filter_out_dupes (MimeWeight mimes[], int n_mimes) |
409 | { |
410 | int last; |
411 | int i, j; |
412 | |
413 | last = n_mimes; |
414 | |
415 | for (i = 0; i < last; i++) |
416 | { |
417 | j = i + 1; |
418 | while (j < last) |
419 | { |
420 | if (strcmp (s1: mimes[i].mime, s2: mimes[j].mime) == 0) |
421 | { |
422 | mimes[i].weight = MAX (mimes[i].weight, mimes[j].weight); |
423 | last--; |
424 | mimes[j].mime = mimes[last].mime; |
425 | mimes[j].weight = mimes[last].weight; |
426 | } |
427 | else |
428 | j++; |
429 | } |
430 | } |
431 | |
432 | return last; |
433 | } |
434 | |
435 | int |
436 | _xdg_glob_hash_lookup_file_name (XdgGlobHash *glob_hash, |
437 | const char *file_name, |
438 | const char *mime_types[], |
439 | int n_mime_types) |
440 | { |
441 | XdgGlobList *list; |
442 | int i, n; |
443 | MimeWeight mimes[10]; |
444 | int n_mimes = 10; |
445 | int len; |
446 | char *lower_case; |
447 | |
448 | /* First, check the literals */ |
449 | |
450 | assert (file_name != NULL && n_mime_types > 0); |
451 | |
452 | n = 0; |
453 | |
454 | lower_case = ascii_tolower (str: file_name); |
455 | |
456 | for (list = glob_hash->literal_list; list; list = list->next) |
457 | { |
458 | if (strcmp (s1: (const char *)list->data, s2: file_name) == 0) |
459 | { |
460 | mime_types[0] = list->mime_type; |
461 | free (ptr: lower_case); |
462 | return 1; |
463 | } |
464 | } |
465 | |
466 | for (list = glob_hash->literal_list; list; list = list->next) |
467 | { |
468 | if (!list->case_sensitive && |
469 | strcmp (s1: (const char *)list->data, s2: lower_case) == 0) |
470 | { |
471 | mime_types[0] = list->mime_type; |
472 | free (ptr: lower_case); |
473 | return 1; |
474 | } |
475 | } |
476 | |
477 | |
478 | len = strlen (s: file_name); |
479 | n = _xdg_glob_hash_node_lookup_file_name (glob_hash_node: glob_hash->simple_node, file_name: lower_case, len, FALSE, |
480 | mime_types: mimes, n_mime_types: n_mimes); |
481 | if (n < 2) |
482 | n += _xdg_glob_hash_node_lookup_file_name (glob_hash_node: glob_hash->simple_node, file_name, len, TRUE, |
483 | mime_types: mimes + n, n_mime_types: n_mimes - n); |
484 | |
485 | if (n < 2) |
486 | { |
487 | for (list = glob_hash->full_list; list && n < n_mime_types; list = list->next) |
488 | { |
489 | if (fnmatch (pattern: (const char *)list->data, name: file_name, flags: 0) == 0) |
490 | { |
491 | mimes[n].mime = list->mime_type; |
492 | mimes[n].weight = list->weight; |
493 | n++; |
494 | } |
495 | } |
496 | } |
497 | free (ptr: lower_case); |
498 | |
499 | n = filter_out_dupes (mimes, n_mimes: n); |
500 | |
501 | qsort (base: mimes, nmemb: n, size: sizeof (MimeWeight), compar: compare_mime_weight); |
502 | |
503 | if (n_mime_types < n) |
504 | n = n_mime_types; |
505 | |
506 | for (i = 0; i < n; i++) |
507 | mime_types[i] = mimes[i].mime; |
508 | |
509 | return n; |
510 | } |
511 | |
512 | |
513 | |
514 | /* XdgGlobHash |
515 | */ |
516 | |
517 | XdgGlobHash * |
518 | _xdg_glob_hash_new (void) |
519 | { |
520 | XdgGlobHash *glob_hash; |
521 | |
522 | glob_hash = calloc (nmemb: 1, size: sizeof (XdgGlobHash)); |
523 | |
524 | return glob_hash; |
525 | } |
526 | |
527 | |
528 | static void |
529 | _xdg_glob_hash_free_nodes (XdgGlobHashNode *node) |
530 | { |
531 | if (node) |
532 | { |
533 | if (node->child) |
534 | _xdg_glob_hash_free_nodes (node: node->child); |
535 | if (node->next) |
536 | _xdg_glob_hash_free_nodes (node: node->next); |
537 | if (node->mime_type) |
538 | free (ptr: (void *) node->mime_type); |
539 | free (ptr: node); |
540 | } |
541 | } |
542 | |
543 | void |
544 | _xdg_glob_hash_free (XdgGlobHash *glob_hash) |
545 | { |
546 | _xdg_glob_list_free (glob_list: glob_hash->literal_list); |
547 | _xdg_glob_list_free (glob_list: glob_hash->full_list); |
548 | _xdg_glob_hash_free_nodes (node: glob_hash->simple_node); |
549 | free (ptr: glob_hash); |
550 | } |
551 | |
552 | XdgGlobType |
553 | _xdg_glob_determine_type (const char *glob) |
554 | { |
555 | const char *ptr; |
556 | int maybe_in_simple_glob = FALSE; |
557 | int first_char = TRUE; |
558 | |
559 | ptr = glob; |
560 | |
561 | while (*ptr != '\0') |
562 | { |
563 | if (*ptr == '*' && first_char) |
564 | maybe_in_simple_glob = TRUE; |
565 | else if (*ptr == '\\' || *ptr == '[' || *ptr == '?' || *ptr == '*') |
566 | return XDG_GLOB_FULL; |
567 | |
568 | first_char = FALSE; |
569 | ptr = _xdg_utf8_next_char (ptr); |
570 | } |
571 | if (maybe_in_simple_glob) |
572 | return XDG_GLOB_SIMPLE; |
573 | else |
574 | return XDG_GLOB_LITERAL; |
575 | } |
576 | |
577 | /* glob must be valid UTF-8 */ |
578 | void |
579 | _xdg_glob_hash_append_glob (XdgGlobHash *glob_hash, |
580 | const char *glob, |
581 | const char *mime_type, |
582 | int weight, |
583 | int case_sensitive) |
584 | { |
585 | XdgGlobType type; |
586 | |
587 | assert (glob_hash != NULL); |
588 | assert (glob != NULL); |
589 | |
590 | type = _xdg_glob_determine_type (glob); |
591 | |
592 | switch (type) |
593 | { |
594 | case XDG_GLOB_LITERAL: |
595 | glob_hash->literal_list = _xdg_glob_list_append (glob_list: glob_hash->literal_list, data: strdup (s: glob), mime_type: strdup (s: mime_type), weight, case_sensitive); |
596 | break; |
597 | case XDG_GLOB_SIMPLE: |
598 | glob_hash->simple_node = _xdg_glob_hash_insert_text (glob_hash_node: glob_hash->simple_node, text: glob + 1, mime_type, weight, case_sensitive); |
599 | break; |
600 | case XDG_GLOB_FULL: |
601 | glob_hash->full_list = _xdg_glob_list_append (glob_list: glob_hash->full_list, data: strdup (s: glob), mime_type: strdup (s: mime_type), weight, case_sensitive); |
602 | break; |
603 | } |
604 | } |
605 | |
606 | #ifdef NOT_USED_IN_GIO |
607 | |
608 | void |
609 | _xdg_glob_hash_dump (XdgGlobHash *glob_hash) |
610 | { |
611 | XdgGlobList *list; |
612 | printf ("LITERAL STRINGS\n" ); |
613 | if (!glob_hash || glob_hash->literal_list == NULL) |
614 | { |
615 | printf (" None\n" ); |
616 | } |
617 | else |
618 | { |
619 | for (list = glob_hash->literal_list; list; list = list->next) |
620 | printf (" %s - %s %d\n" , (char *)list->data, list->mime_type, list->weight); |
621 | } |
622 | printf ("\nSIMPLE GLOBS\n" ); |
623 | if (!glob_hash || glob_hash->simple_node == NULL) |
624 | { |
625 | printf (" None\n" ); |
626 | } |
627 | else |
628 | { |
629 | _xdg_glob_hash_node_dump (glob_hash->simple_node, 4); |
630 | } |
631 | |
632 | printf ("\nFULL GLOBS\n" ); |
633 | if (!glob_hash || glob_hash->full_list == NULL) |
634 | { |
635 | printf (" None\n" ); |
636 | } |
637 | else |
638 | { |
639 | for (list = glob_hash->full_list; list; list = list->next) |
640 | printf (" %s - %s %d\n" , (char *)list->data, list->mime_type, list->weight); |
641 | } |
642 | } |
643 | |
644 | #endif |
645 | |
646 | void |
647 | _xdg_mime_glob_read_from_file (XdgGlobHash *glob_hash, |
648 | const char *file_name, |
649 | int version_two) |
650 | { |
651 | FILE *glob_file; |
652 | char line[255]; |
653 | char *p; |
654 | |
655 | glob_file = fopen (filename: file_name, modes: "r" ); |
656 | |
657 | if (glob_file == NULL) |
658 | return; |
659 | |
660 | /* FIXME: Not UTF-8 safe. Doesn't work if lines are greater than 255 chars. |
661 | * Blah */ |
662 | while (fgets (s: line, n: 255, stream: glob_file) != NULL) |
663 | { |
664 | char *colon; |
665 | char *mimetype, *glob, *end; |
666 | int weight; |
667 | int case_sensitive; |
668 | |
669 | if (line[0] == '#' || line[0] == 0) |
670 | continue; |
671 | |
672 | end = line + strlen(s: line) - 1; |
673 | if (*end == '\n') |
674 | *end = 0; |
675 | |
676 | p = line; |
677 | if (version_two) |
678 | { |
679 | colon = strchr (s: p, c: ':'); |
680 | if (colon == NULL) |
681 | continue; |
682 | *colon = 0; |
683 | weight = atoi (nptr: p); |
684 | p = colon + 1; |
685 | } |
686 | else |
687 | weight = 50; |
688 | |
689 | colon = strchr (s: p, c: ':'); |
690 | if (colon == NULL) |
691 | continue; |
692 | *colon = 0; |
693 | |
694 | mimetype = p; |
695 | p = colon + 1; |
696 | glob = p; |
697 | case_sensitive = FALSE; |
698 | |
699 | colon = strchr (s: p, c: ':'); |
700 | if (version_two && colon != NULL) |
701 | { |
702 | char *flag; |
703 | |
704 | /* We got flags */ |
705 | *colon = 0; |
706 | p = colon + 1; |
707 | |
708 | /* Flags end at next colon */ |
709 | colon = strchr (s: p, c: ':'); |
710 | if (colon != NULL) |
711 | *colon = 0; |
712 | |
713 | flag = strstr (haystack: p, needle: "cs" ); |
714 | if (flag != NULL && |
715 | /* Start or after comma */ |
716 | (flag == p || |
717 | flag[-1] == ',') && |
718 | /* ends with comma or end of string */ |
719 | (flag[2] == 0 || |
720 | flag[2] == ',')) |
721 | case_sensitive = TRUE; |
722 | } |
723 | |
724 | _xdg_glob_hash_append_glob (glob_hash, glob, mime_type: mimetype, weight, case_sensitive); |
725 | } |
726 | |
727 | fclose (stream: glob_file); |
728 | } |
729 | |