1 | /* GLIB - Library of useful routines for C programming |
2 | * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald |
3 | * |
4 | * GScanner: Flexible lexical scanner for general purpose. |
5 | * Copyright (C) 1997, 1998 Tim Janik |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | */ |
20 | |
21 | /* |
22 | * Modified by the GLib Team and others 1997-2000. See the AUTHORS |
23 | * file for a list of people on the GLib Team. See the ChangeLog |
24 | * files for a list of changes. These files are distributed with |
25 | * GLib at ftp://ftp.gtk.org/pub/gtk/. |
26 | */ |
27 | |
28 | /* |
29 | * MT safe |
30 | */ |
31 | |
32 | #include "config.h" |
33 | |
34 | #include <errno.h> |
35 | #include <stdlib.h> |
36 | #include <stdarg.h> |
37 | #include <string.h> |
38 | #include <stdio.h> |
39 | |
40 | #include "gscanner.h" |
41 | |
42 | #include "gprintfint.h" |
43 | #include "gstrfuncs.h" |
44 | #include "gstring.h" |
45 | #include "gtestutils.h" |
46 | |
47 | #ifdef G_OS_UNIX |
48 | #include <unistd.h> |
49 | #endif |
50 | #ifdef G_OS_WIN32 |
51 | #include <io.h> |
52 | #endif |
53 | |
54 | |
55 | /** |
56 | * SECTION:scanner |
57 | * @title: Lexical Scanner |
58 | * @short_description: a general purpose lexical scanner |
59 | * |
60 | * The #GScanner and its associated functions provide a |
61 | * general purpose lexical scanner. |
62 | */ |
63 | |
64 | /** |
65 | * GScannerMsgFunc: |
66 | * @scanner: a #GScanner |
67 | * @message: the message |
68 | * @error: %TRUE if the message signals an error, |
69 | * %FALSE if it signals a warning. |
70 | * |
71 | * Specifies the type of the message handler function. |
72 | */ |
73 | |
74 | /** |
75 | * G_CSET_a_2_z: |
76 | * |
77 | * The set of lowercase ASCII alphabet characters. |
78 | * Used for specifying valid identifier characters |
79 | * in #GScannerConfig. |
80 | */ |
81 | |
82 | /** |
83 | * G_CSET_A_2_Z: |
84 | * |
85 | * The set of uppercase ASCII alphabet characters. |
86 | * Used for specifying valid identifier characters |
87 | * in #GScannerConfig. |
88 | */ |
89 | |
90 | /** |
91 | * G_CSET_DIGITS: |
92 | * |
93 | * The set of ASCII digits. |
94 | * Used for specifying valid identifier characters |
95 | * in #GScannerConfig. |
96 | */ |
97 | |
98 | /** |
99 | * G_CSET_LATINC: |
100 | * |
101 | * The set of uppercase ISO 8859-1 alphabet characters |
102 | * which are not ASCII characters. |
103 | * Used for specifying valid identifier characters |
104 | * in #GScannerConfig. |
105 | */ |
106 | |
107 | /** |
108 | * G_CSET_LATINS: |
109 | * |
110 | * The set of lowercase ISO 8859-1 alphabet characters |
111 | * which are not ASCII characters. |
112 | * Used for specifying valid identifier characters |
113 | * in #GScannerConfig. |
114 | */ |
115 | |
116 | /** |
117 | * GTokenType: |
118 | * @G_TOKEN_EOF: the end of the file |
119 | * @G_TOKEN_LEFT_PAREN: a '(' character |
120 | * @G_TOKEN_LEFT_CURLY: a '{' character |
121 | * @G_TOKEN_LEFT_BRACE: a '[' character |
122 | * @G_TOKEN_RIGHT_CURLY: a '}' character |
123 | * @G_TOKEN_RIGHT_PAREN: a ')' character |
124 | * @G_TOKEN_RIGHT_BRACE: a ']' character |
125 | * @G_TOKEN_EQUAL_SIGN: a '=' character |
126 | * @G_TOKEN_COMMA: a ',' character |
127 | * @G_TOKEN_NONE: not a token |
128 | * @G_TOKEN_ERROR: an error occurred |
129 | * @G_TOKEN_CHAR: a character |
130 | * @G_TOKEN_BINARY: a binary integer |
131 | * @G_TOKEN_OCTAL: an octal integer |
132 | * @G_TOKEN_INT: an integer |
133 | * @G_TOKEN_HEX: a hex integer |
134 | * @G_TOKEN_FLOAT: a floating point number |
135 | * @G_TOKEN_STRING: a string |
136 | * @G_TOKEN_SYMBOL: a symbol |
137 | * @G_TOKEN_IDENTIFIER: an identifier |
138 | * @G_TOKEN_IDENTIFIER_NULL: a null identifier |
139 | * @G_TOKEN_COMMENT_SINGLE: one line comment |
140 | * @G_TOKEN_COMMENT_MULTI: multi line comment |
141 | * |
142 | * The possible types of token returned from each |
143 | * g_scanner_get_next_token() call. |
144 | */ |
145 | |
146 | /** |
147 | * GTokenValue: |
148 | * @v_symbol: token symbol value |
149 | * @v_identifier: token identifier value |
150 | * @v_binary: token binary integer value |
151 | * @v_octal: octal integer value |
152 | * @v_int: integer value |
153 | * @v_int64: 64-bit integer value |
154 | * @v_float: floating point value |
155 | * @v_hex: hex integer value |
156 | * @v_string: string value |
157 | * @v_comment: comment value |
158 | * @v_char: character value |
159 | * @v_error: error value |
160 | * |
161 | * A union holding the value of the token. |
162 | */ |
163 | |
164 | /** |
165 | * GErrorType: |
166 | * @G_ERR_UNKNOWN: unknown error |
167 | * @G_ERR_UNEXP_EOF: unexpected end of file |
168 | * @G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant |
169 | * @G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment |
170 | * @G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number |
171 | * @G_ERR_DIGIT_RADIX: digit beyond radix in a number |
172 | * @G_ERR_FLOAT_RADIX: non-decimal floating point number |
173 | * @G_ERR_FLOAT_MALFORMED: malformed floating point number |
174 | * |
175 | * The possible errors, used in the @v_error field |
176 | * of #GTokenValue, when the token is a %G_TOKEN_ERROR. |
177 | */ |
178 | |
179 | /** |
180 | * GScanner: |
181 | * @user_data: unused |
182 | * @max_parse_errors: unused |
183 | * @parse_errors: g_scanner_error() increments this field |
184 | * @input_name: name of input stream, featured by the default message handler |
185 | * @qdata: quarked data |
186 | * @config: link into the scanner configuration |
187 | * @token: token parsed by the last g_scanner_get_next_token() |
188 | * @value: value of the last token from g_scanner_get_next_token() |
189 | * @line: line number of the last token from g_scanner_get_next_token() |
190 | * @position: char number of the last token from g_scanner_get_next_token() |
191 | * @next_token: token parsed by the last g_scanner_peek_next_token() |
192 | * @next_value: value of the last token from g_scanner_peek_next_token() |
193 | * @next_line: line number of the last token from g_scanner_peek_next_token() |
194 | * @next_position: char number of the last token from g_scanner_peek_next_token() |
195 | * @msg_handler: handler function for _warn and _error |
196 | * |
197 | * The data structure representing a lexical scanner. |
198 | * |
199 | * You should set @input_name after creating the scanner, since |
200 | * it is used by the default message handler when displaying |
201 | * warnings and errors. If you are scanning a file, the filename |
202 | * would be a good choice. |
203 | * |
204 | * The @user_data and @max_parse_errors fields are not used. |
205 | * If you need to associate extra data with the scanner you |
206 | * can place them here. |
207 | * |
208 | * If you want to use your own message handler you can set the |
209 | * @msg_handler field. The type of the message handler function |
210 | * is declared by #GScannerMsgFunc. |
211 | */ |
212 | |
213 | /** |
214 | * GScannerConfig: |
215 | * @cset_skip_characters: specifies which characters should be skipped |
216 | * by the scanner (the default is the whitespace characters: space, |
217 | * tab, carriage-return and line-feed). |
218 | * @cset_identifier_first: specifies the characters which can start |
219 | * identifiers (the default is #G_CSET_a_2_z, "_", and #G_CSET_A_2_Z). |
220 | * @cset_identifier_nth: specifies the characters which can be used |
221 | * in identifiers, after the first character (the default is |
222 | * #G_CSET_a_2_z, "_0123456789", #G_CSET_A_2_Z, #G_CSET_LATINS, |
223 | * #G_CSET_LATINC). |
224 | * @cpair_comment_single: specifies the characters at the start and |
225 | * end of single-line comments. The default is "#\n" which means |
226 | * that single-line comments start with a '#' and continue until |
227 | * a '\n' (end of line). |
228 | * @case_sensitive: specifies if symbols are case sensitive (the |
229 | * default is %FALSE). |
230 | * @skip_comment_multi: specifies if multi-line comments are skipped |
231 | * and not returned as tokens (the default is %TRUE). |
232 | * @skip_comment_single: specifies if single-line comments are skipped |
233 | * and not returned as tokens (the default is %TRUE). |
234 | * @scan_comment_multi: specifies if multi-line comments are recognized |
235 | * (the default is %TRUE). |
236 | * @scan_identifier: specifies if identifiers are recognized (the |
237 | * default is %TRUE). |
238 | * @scan_identifier_1char: specifies if single-character |
239 | * identifiers are recognized (the default is %FALSE). |
240 | * @scan_identifier_NULL: specifies if %NULL is reported as |
241 | * %G_TOKEN_IDENTIFIER_NULL (the default is %FALSE). |
242 | * @scan_symbols: specifies if symbols are recognized (the default |
243 | * is %TRUE). |
244 | * @scan_binary: specifies if binary numbers are recognized (the |
245 | * default is %FALSE). |
246 | * @scan_octal: specifies if octal numbers are recognized (the |
247 | * default is %TRUE). |
248 | * @scan_float: specifies if floating point numbers are recognized |
249 | * (the default is %TRUE). |
250 | * @scan_hex: specifies if hexadecimal numbers are recognized (the |
251 | * default is %TRUE). |
252 | * @scan_hex_dollar: specifies if '$' is recognized as a prefix for |
253 | * hexadecimal numbers (the default is %FALSE). |
254 | * @scan_string_sq: specifies if strings can be enclosed in single |
255 | * quotes (the default is %TRUE). |
256 | * @scan_string_dq: specifies if strings can be enclosed in double |
257 | * quotes (the default is %TRUE). |
258 | * @numbers_2_int: specifies if binary, octal and hexadecimal numbers |
259 | * are reported as #G_TOKEN_INT (the default is %TRUE). |
260 | * @int_2_float: specifies if all numbers are reported as %G_TOKEN_FLOAT |
261 | * (the default is %FALSE). |
262 | * @identifier_2_string: specifies if identifiers are reported as strings |
263 | * (the default is %FALSE). |
264 | * @char_2_token: specifies if characters are reported by setting |
265 | * `token = ch` or as %G_TOKEN_CHAR (the default is %TRUE). |
266 | * @symbol_2_token: specifies if symbols are reported by setting |
267 | * `token = v_symbol` or as %G_TOKEN_SYMBOL (the default is %FALSE). |
268 | * @scope_0_fallback: specifies if a symbol is searched for in the |
269 | * default scope in addition to the current scope (the default is %FALSE). |
270 | * @store_int64: use value.v_int64 rather than v_int |
271 | * |
272 | * Specifies the #GScanner parser configuration. Most settings can |
273 | * be changed during the parsing phase and will affect the lexical |
274 | * parsing of the next unpeeked token. |
275 | */ |
276 | |
277 | /* --- defines --- */ |
278 | #define to_lower(c) ( \ |
279 | (guchar) ( \ |
280 | ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \ |
281 | ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \ |
282 | ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \ |
283 | ((guchar)(c)) \ |
284 | ) \ |
285 | ) |
286 | #define READ_BUFFER_SIZE (4000) |
287 | |
288 | |
289 | /* --- typedefs --- */ |
290 | typedef struct _GScannerKey GScannerKey; |
291 | |
292 | struct _GScannerKey |
293 | { |
294 | guint scope_id; |
295 | gchar *symbol; |
296 | gpointer value; |
297 | }; |
298 | |
299 | |
300 | /* --- variables --- */ |
301 | static const GScannerConfig g_scanner_config_template = |
302 | { |
303 | ( |
304 | " \t\r\n" |
305 | ) /* cset_skip_characters */, |
306 | ( |
307 | G_CSET_a_2_z |
308 | "_" |
309 | G_CSET_A_2_Z |
310 | ) /* cset_identifier_first */, |
311 | ( |
312 | G_CSET_a_2_z |
313 | "_" |
314 | G_CSET_A_2_Z |
315 | G_CSET_DIGITS |
316 | G_CSET_LATINS |
317 | G_CSET_LATINC |
318 | ) /* cset_identifier_nth */, |
319 | ( "#\n" ) /* cpair_comment_single */, |
320 | |
321 | FALSE /* case_sensitive */, |
322 | |
323 | TRUE /* skip_comment_multi */, |
324 | TRUE /* skip_comment_single */, |
325 | TRUE /* scan_comment_multi */, |
326 | TRUE /* scan_identifier */, |
327 | FALSE /* scan_identifier_1char */, |
328 | FALSE /* scan_identifier_NULL */, |
329 | TRUE /* scan_symbols */, |
330 | FALSE /* scan_binary */, |
331 | TRUE /* scan_octal */, |
332 | TRUE /* scan_float */, |
333 | TRUE /* scan_hex */, |
334 | FALSE /* scan_hex_dollar */, |
335 | TRUE /* scan_string_sq */, |
336 | TRUE /* scan_string_dq */, |
337 | TRUE /* numbers_2_int */, |
338 | FALSE /* int_2_float */, |
339 | FALSE /* identifier_2_string */, |
340 | TRUE /* char_2_token */, |
341 | FALSE /* symbol_2_token */, |
342 | FALSE /* scope_0_fallback */, |
343 | FALSE /* store_int64 */, |
344 | 0 /* padding_dummy */ |
345 | }; |
346 | |
347 | |
348 | /* --- prototypes --- */ |
349 | static inline |
350 | GScannerKey* g_scanner_lookup_internal (GScanner *scanner, |
351 | guint scope_id, |
352 | const gchar *symbol); |
353 | static gboolean g_scanner_key_equal (gconstpointer v1, |
354 | gconstpointer v2); |
355 | static guint g_scanner_key_hash (gconstpointer v); |
356 | static void g_scanner_get_token_ll (GScanner *scanner, |
357 | GTokenType *token_p, |
358 | GTokenValue *value_p, |
359 | guint *line_p, |
360 | guint *position_p); |
361 | static void g_scanner_get_token_i (GScanner *scanner, |
362 | GTokenType *token_p, |
363 | GTokenValue *value_p, |
364 | guint *line_p, |
365 | guint *position_p); |
366 | |
367 | static guchar g_scanner_peek_next_char (GScanner *scanner); |
368 | static guchar g_scanner_get_char (GScanner *scanner, |
369 | guint *line_p, |
370 | guint *position_p); |
371 | static void g_scanner_msg_handler (GScanner *scanner, |
372 | gchar *message, |
373 | gboolean is_error); |
374 | |
375 | |
376 | /* --- functions --- */ |
377 | static inline gint |
378 | g_scanner_char_2_num (guchar c, |
379 | guchar base) |
380 | { |
381 | if (c >= '0' && c <= '9') |
382 | c -= '0'; |
383 | else if (c >= 'A' && c <= 'Z') |
384 | c -= 'A' - 10; |
385 | else if (c >= 'a' && c <= 'z') |
386 | c -= 'a' - 10; |
387 | else |
388 | return -1; |
389 | |
390 | if (c < base) |
391 | return c; |
392 | |
393 | return -1; |
394 | } |
395 | |
396 | /** |
397 | * g_scanner_new: |
398 | * @config_templ: the initial scanner settings |
399 | * |
400 | * Creates a new #GScanner. |
401 | * |
402 | * The @config_templ structure specifies the initial settings |
403 | * of the scanner, which are copied into the #GScanner |
404 | * @config field. If you pass %NULL then the default settings |
405 | * are used. |
406 | * |
407 | * Returns: the new #GScanner |
408 | */ |
409 | GScanner * |
410 | g_scanner_new (const GScannerConfig *config_templ) |
411 | { |
412 | GScanner *scanner; |
413 | |
414 | if (!config_templ) |
415 | config_templ = &g_scanner_config_template; |
416 | |
417 | scanner = g_new0 (GScanner, 1); |
418 | |
419 | scanner->user_data = NULL; |
420 | scanner->max_parse_errors = 1; |
421 | scanner->parse_errors = 0; |
422 | scanner->input_name = NULL; |
423 | g_datalist_init (datalist: &scanner->qdata); |
424 | |
425 | scanner->config = g_new0 (GScannerConfig, 1); |
426 | |
427 | scanner->config->case_sensitive = config_templ->case_sensitive; |
428 | scanner->config->cset_skip_characters = config_templ->cset_skip_characters; |
429 | if (!scanner->config->cset_skip_characters) |
430 | scanner->config->cset_skip_characters = "" ; |
431 | scanner->config->cset_identifier_first = config_templ->cset_identifier_first; |
432 | scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth; |
433 | scanner->config->cpair_comment_single = config_templ->cpair_comment_single; |
434 | scanner->config->skip_comment_multi = config_templ->skip_comment_multi; |
435 | scanner->config->skip_comment_single = config_templ->skip_comment_single; |
436 | scanner->config->scan_comment_multi = config_templ->scan_comment_multi; |
437 | scanner->config->scan_identifier = config_templ->scan_identifier; |
438 | scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char; |
439 | scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL; |
440 | scanner->config->scan_symbols = config_templ->scan_symbols; |
441 | scanner->config->scan_binary = config_templ->scan_binary; |
442 | scanner->config->scan_octal = config_templ->scan_octal; |
443 | scanner->config->scan_float = config_templ->scan_float; |
444 | scanner->config->scan_hex = config_templ->scan_hex; |
445 | scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar; |
446 | scanner->config->scan_string_sq = config_templ->scan_string_sq; |
447 | scanner->config->scan_string_dq = config_templ->scan_string_dq; |
448 | scanner->config->numbers_2_int = config_templ->numbers_2_int; |
449 | scanner->config->int_2_float = config_templ->int_2_float; |
450 | scanner->config->identifier_2_string = config_templ->identifier_2_string; |
451 | scanner->config->char_2_token = config_templ->char_2_token; |
452 | scanner->config->symbol_2_token = config_templ->symbol_2_token; |
453 | scanner->config->scope_0_fallback = config_templ->scope_0_fallback; |
454 | scanner->config->store_int64 = config_templ->store_int64; |
455 | |
456 | scanner->token = G_TOKEN_NONE; |
457 | scanner->value.v_int64 = 0; |
458 | scanner->line = 1; |
459 | scanner->position = 0; |
460 | |
461 | scanner->next_token = G_TOKEN_NONE; |
462 | scanner->next_value.v_int64 = 0; |
463 | scanner->next_line = 1; |
464 | scanner->next_position = 0; |
465 | |
466 | scanner->symbol_table = g_hash_table_new (hash_func: g_scanner_key_hash, key_equal_func: g_scanner_key_equal); |
467 | scanner->input_fd = -1; |
468 | scanner->text = NULL; |
469 | scanner->text_end = NULL; |
470 | scanner->buffer = NULL; |
471 | scanner->scope_id = 0; |
472 | |
473 | scanner->msg_handler = g_scanner_msg_handler; |
474 | |
475 | return scanner; |
476 | } |
477 | |
478 | static inline void |
479 | g_scanner_free_value (GTokenType *token_p, |
480 | GTokenValue *value_p) |
481 | { |
482 | switch (*token_p) |
483 | { |
484 | case G_TOKEN_STRING: |
485 | case G_TOKEN_IDENTIFIER: |
486 | case G_TOKEN_IDENTIFIER_NULL: |
487 | case G_TOKEN_COMMENT_SINGLE: |
488 | case G_TOKEN_COMMENT_MULTI: |
489 | g_free (mem: value_p->v_string); |
490 | break; |
491 | |
492 | default: |
493 | break; |
494 | } |
495 | |
496 | *token_p = G_TOKEN_NONE; |
497 | } |
498 | |
499 | static void |
500 | g_scanner_destroy_symbol_table_entry (gpointer _key, |
501 | gpointer _value, |
502 | gpointer _data) |
503 | { |
504 | GScannerKey *key = _key; |
505 | |
506 | g_free (mem: key->symbol); |
507 | g_free (mem: key); |
508 | } |
509 | |
510 | /** |
511 | * g_scanner_destroy: |
512 | * @scanner: a #GScanner |
513 | * |
514 | * Frees all memory used by the #GScanner. |
515 | */ |
516 | void |
517 | g_scanner_destroy (GScanner *scanner) |
518 | { |
519 | g_return_if_fail (scanner != NULL); |
520 | |
521 | g_datalist_clear (datalist: &scanner->qdata); |
522 | g_hash_table_foreach (hash_table: scanner->symbol_table, |
523 | func: g_scanner_destroy_symbol_table_entry, NULL); |
524 | g_hash_table_destroy (hash_table: scanner->symbol_table); |
525 | g_scanner_free_value (token_p: &scanner->token, value_p: &scanner->value); |
526 | g_scanner_free_value (token_p: &scanner->next_token, value_p: &scanner->next_value); |
527 | g_free (mem: scanner->config); |
528 | g_free (mem: scanner->buffer); |
529 | g_free (mem: scanner); |
530 | } |
531 | |
532 | static void |
533 | g_scanner_msg_handler (GScanner *scanner, |
534 | gchar *message, |
535 | gboolean is_error) |
536 | { |
537 | g_return_if_fail (scanner != NULL); |
538 | |
539 | _g_fprintf (stderr, format: "%s:%d: " , |
540 | scanner->input_name ? scanner->input_name : "<memory>" , |
541 | scanner->line); |
542 | if (is_error) |
543 | _g_fprintf (stderr, format: "error: " ); |
544 | _g_fprintf (stderr, format: "%s\n" , message); |
545 | } |
546 | |
547 | /** |
548 | * g_scanner_error: |
549 | * @scanner: a #GScanner |
550 | * @format: the message format. See the printf() documentation |
551 | * @...: the parameters to insert into the format string |
552 | * |
553 | * Outputs an error message, via the #GScanner message handler. |
554 | */ |
555 | void |
556 | g_scanner_error (GScanner *scanner, |
557 | const gchar *format, |
558 | ...) |
559 | { |
560 | g_return_if_fail (scanner != NULL); |
561 | g_return_if_fail (format != NULL); |
562 | |
563 | scanner->parse_errors++; |
564 | |
565 | if (scanner->msg_handler) |
566 | { |
567 | va_list args; |
568 | gchar *string; |
569 | |
570 | va_start (args, format); |
571 | string = g_strdup_vprintf (format, args); |
572 | va_end (args); |
573 | |
574 | scanner->msg_handler (scanner, string, TRUE); |
575 | |
576 | g_free (mem: string); |
577 | } |
578 | } |
579 | |
580 | /** |
581 | * g_scanner_warn: |
582 | * @scanner: a #GScanner |
583 | * @format: the message format. See the printf() documentation |
584 | * @...: the parameters to insert into the format string |
585 | * |
586 | * Outputs a warning message, via the #GScanner message handler. |
587 | */ |
588 | void |
589 | g_scanner_warn (GScanner *scanner, |
590 | const gchar *format, |
591 | ...) |
592 | { |
593 | g_return_if_fail (scanner != NULL); |
594 | g_return_if_fail (format != NULL); |
595 | |
596 | if (scanner->msg_handler) |
597 | { |
598 | va_list args; |
599 | gchar *string; |
600 | |
601 | va_start (args, format); |
602 | string = g_strdup_vprintf (format, args); |
603 | va_end (args); |
604 | |
605 | scanner->msg_handler (scanner, string, FALSE); |
606 | |
607 | g_free (mem: string); |
608 | } |
609 | } |
610 | |
611 | static gboolean |
612 | g_scanner_key_equal (gconstpointer v1, |
613 | gconstpointer v2) |
614 | { |
615 | const GScannerKey *key1 = v1; |
616 | const GScannerKey *key2 = v2; |
617 | |
618 | return (key1->scope_id == key2->scope_id) && (strcmp (s1: key1->symbol, s2: key2->symbol) == 0); |
619 | } |
620 | |
621 | static guint |
622 | g_scanner_key_hash (gconstpointer v) |
623 | { |
624 | const GScannerKey *key = v; |
625 | gchar *c; |
626 | guint h; |
627 | |
628 | h = key->scope_id; |
629 | for (c = key->symbol; *c; c++) |
630 | h = (h << 5) - h + *c; |
631 | |
632 | return h; |
633 | } |
634 | |
635 | static inline GScannerKey* |
636 | g_scanner_lookup_internal (GScanner *scanner, |
637 | guint scope_id, |
638 | const gchar *symbol) |
639 | { |
640 | GScannerKey *key_p; |
641 | GScannerKey key; |
642 | |
643 | key.scope_id = scope_id; |
644 | |
645 | if (!scanner->config->case_sensitive) |
646 | { |
647 | gchar *d; |
648 | const gchar *c; |
649 | |
650 | key.symbol = g_new (gchar, strlen (symbol) + 1); |
651 | for (d = key.symbol, c = symbol; *c; c++, d++) |
652 | *d = to_lower (*c); |
653 | *d = 0; |
654 | key_p = g_hash_table_lookup (hash_table: scanner->symbol_table, key: &key); |
655 | g_free (mem: key.symbol); |
656 | } |
657 | else |
658 | { |
659 | key.symbol = (gchar*) symbol; |
660 | key_p = g_hash_table_lookup (hash_table: scanner->symbol_table, key: &key); |
661 | } |
662 | |
663 | return key_p; |
664 | } |
665 | |
666 | /** |
667 | * g_scanner_add_symbol: |
668 | * @scanner: a #GScanner |
669 | * @symbol: the symbol to add |
670 | * @value: the value of the symbol |
671 | * |
672 | * Adds a symbol to the default scope. |
673 | * |
674 | * Deprecated: 2.2: Use g_scanner_scope_add_symbol() instead. |
675 | */ |
676 | |
677 | /** |
678 | * g_scanner_scope_add_symbol: |
679 | * @scanner: a #GScanner |
680 | * @scope_id: the scope id |
681 | * @symbol: the symbol to add |
682 | * @value: the value of the symbol |
683 | * |
684 | * Adds a symbol to the given scope. |
685 | */ |
686 | void |
687 | g_scanner_scope_add_symbol (GScanner *scanner, |
688 | guint scope_id, |
689 | const gchar *symbol, |
690 | gpointer value) |
691 | { |
692 | GScannerKey *key; |
693 | |
694 | g_return_if_fail (scanner != NULL); |
695 | g_return_if_fail (symbol != NULL); |
696 | |
697 | key = g_scanner_lookup_internal (scanner, scope_id, symbol); |
698 | |
699 | if (!key) |
700 | { |
701 | key = g_new (GScannerKey, 1); |
702 | key->scope_id = scope_id; |
703 | key->symbol = g_strdup (str: symbol); |
704 | key->value = value; |
705 | if (!scanner->config->case_sensitive) |
706 | { |
707 | gchar *c; |
708 | |
709 | c = key->symbol; |
710 | while (*c != 0) |
711 | { |
712 | *c = to_lower (*c); |
713 | c++; |
714 | } |
715 | } |
716 | g_hash_table_add (hash_table: scanner->symbol_table, key); |
717 | } |
718 | else |
719 | key->value = value; |
720 | } |
721 | |
722 | /** |
723 | * g_scanner_remove_symbol: |
724 | * @scanner: a #GScanner |
725 | * @symbol: the symbol to remove |
726 | * |
727 | * Removes a symbol from the default scope. |
728 | * |
729 | * Deprecated: 2.2: Use g_scanner_scope_remove_symbol() instead. |
730 | */ |
731 | |
732 | /** |
733 | * g_scanner_scope_remove_symbol: |
734 | * @scanner: a #GScanner |
735 | * @scope_id: the scope id |
736 | * @symbol: the symbol to remove |
737 | * |
738 | * Removes a symbol from a scope. |
739 | */ |
740 | void |
741 | g_scanner_scope_remove_symbol (GScanner *scanner, |
742 | guint scope_id, |
743 | const gchar *symbol) |
744 | { |
745 | GScannerKey *key; |
746 | |
747 | g_return_if_fail (scanner != NULL); |
748 | g_return_if_fail (symbol != NULL); |
749 | |
750 | key = g_scanner_lookup_internal (scanner, scope_id, symbol); |
751 | |
752 | if (key) |
753 | { |
754 | g_hash_table_remove (hash_table: scanner->symbol_table, key); |
755 | g_free (mem: key->symbol); |
756 | g_free (mem: key); |
757 | } |
758 | } |
759 | |
760 | /** |
761 | * g_scanner_freeze_symbol_table: |
762 | * @scanner: a #GScanner |
763 | * |
764 | * There is no reason to use this macro, since it does nothing. |
765 | * |
766 | * Deprecated: 2.2: This macro does nothing. |
767 | */ |
768 | |
769 | /** |
770 | * g_scanner_thaw_symbol_table: |
771 | * @scanner: a #GScanner |
772 | * |
773 | * There is no reason to use this macro, since it does nothing. |
774 | * |
775 | * Deprecated: 2.2: This macro does nothing. |
776 | */ |
777 | |
778 | /** |
779 | * g_scanner_lookup_symbol: |
780 | * @scanner: a #GScanner |
781 | * @symbol: the symbol to look up |
782 | * |
783 | * Looks up a symbol in the current scope and return its value. |
784 | * If the symbol is not bound in the current scope, %NULL is |
785 | * returned. |
786 | * |
787 | * Returns: the value of @symbol in the current scope, or %NULL |
788 | * if @symbol is not bound in the current scope |
789 | */ |
790 | gpointer |
791 | g_scanner_lookup_symbol (GScanner *scanner, |
792 | const gchar *symbol) |
793 | { |
794 | GScannerKey *key; |
795 | guint scope_id; |
796 | |
797 | g_return_val_if_fail (scanner != NULL, NULL); |
798 | |
799 | if (!symbol) |
800 | return NULL; |
801 | |
802 | scope_id = scanner->scope_id; |
803 | key = g_scanner_lookup_internal (scanner, scope_id, symbol); |
804 | if (!key && scope_id && scanner->config->scope_0_fallback) |
805 | key = g_scanner_lookup_internal (scanner, scope_id: 0, symbol); |
806 | |
807 | if (key) |
808 | return key->value; |
809 | else |
810 | return NULL; |
811 | } |
812 | |
813 | /** |
814 | * g_scanner_scope_lookup_symbol: |
815 | * @scanner: a #GScanner |
816 | * @scope_id: the scope id |
817 | * @symbol: the symbol to look up |
818 | * |
819 | * Looks up a symbol in a scope and return its value. If the |
820 | * symbol is not bound in the scope, %NULL is returned. |
821 | * |
822 | * Returns: the value of @symbol in the given scope, or %NULL |
823 | * if @symbol is not bound in the given scope. |
824 | * |
825 | */ |
826 | gpointer |
827 | g_scanner_scope_lookup_symbol (GScanner *scanner, |
828 | guint scope_id, |
829 | const gchar *symbol) |
830 | { |
831 | GScannerKey *key; |
832 | |
833 | g_return_val_if_fail (scanner != NULL, NULL); |
834 | |
835 | if (!symbol) |
836 | return NULL; |
837 | |
838 | key = g_scanner_lookup_internal (scanner, scope_id, symbol); |
839 | |
840 | if (key) |
841 | return key->value; |
842 | else |
843 | return NULL; |
844 | } |
845 | |
846 | /** |
847 | * g_scanner_set_scope: |
848 | * @scanner: a #GScanner |
849 | * @scope_id: the new scope id |
850 | * |
851 | * Sets the current scope. |
852 | * |
853 | * Returns: the old scope id |
854 | */ |
855 | guint |
856 | g_scanner_set_scope (GScanner *scanner, |
857 | guint scope_id) |
858 | { |
859 | guint old_scope_id; |
860 | |
861 | g_return_val_if_fail (scanner != NULL, 0); |
862 | |
863 | old_scope_id = scanner->scope_id; |
864 | scanner->scope_id = scope_id; |
865 | |
866 | return old_scope_id; |
867 | } |
868 | |
869 | static void |
870 | g_scanner_foreach_internal (gpointer _key, |
871 | gpointer _value, |
872 | gpointer _user_data) |
873 | { |
874 | GScannerKey *key; |
875 | gpointer *d; |
876 | GHFunc func; |
877 | gpointer user_data; |
878 | guint *scope_id; |
879 | |
880 | d = _user_data; |
881 | func = (GHFunc) d[0]; |
882 | user_data = d[1]; |
883 | scope_id = d[2]; |
884 | key = _value; |
885 | |
886 | if (key->scope_id == *scope_id) |
887 | func (key->symbol, key->value, user_data); |
888 | } |
889 | |
890 | /** |
891 | * g_scanner_foreach_symbol: |
892 | * @scanner: a #GScanner |
893 | * @func: the function to call with each symbol |
894 | * @data: data to pass to the function |
895 | * |
896 | * Calls a function for each symbol in the default scope. |
897 | * |
898 | * Deprecated: 2.2: Use g_scanner_scope_foreach_symbol() instead. |
899 | */ |
900 | |
901 | /** |
902 | * g_scanner_scope_foreach_symbol: |
903 | * @scanner: a #GScanner |
904 | * @scope_id: the scope id |
905 | * @func: the function to call for each symbol/value pair |
906 | * @user_data: user data to pass to the function |
907 | * |
908 | * Calls the given function for each of the symbol/value pairs |
909 | * in the given scope of the #GScanner. The function is passed |
910 | * the symbol and value of each pair, and the given @user_data |
911 | * parameter. |
912 | */ |
913 | void |
914 | g_scanner_scope_foreach_symbol (GScanner *scanner, |
915 | guint scope_id, |
916 | GHFunc func, |
917 | gpointer user_data) |
918 | { |
919 | gpointer d[3]; |
920 | |
921 | g_return_if_fail (scanner != NULL); |
922 | |
923 | d[0] = (gpointer) func; |
924 | d[1] = user_data; |
925 | d[2] = &scope_id; |
926 | |
927 | g_hash_table_foreach (hash_table: scanner->symbol_table, func: g_scanner_foreach_internal, user_data: d); |
928 | } |
929 | |
930 | /** |
931 | * g_scanner_peek_next_token: |
932 | * @scanner: a #GScanner |
933 | * |
934 | * Parses the next token, without removing it from the input stream. |
935 | * The token data is placed in the @next_token, @next_value, @next_line, |
936 | * and @next_position fields of the #GScanner structure. |
937 | * |
938 | * Note that, while the token is not removed from the input stream |
939 | * (i.e. the next call to g_scanner_get_next_token() will return the |
940 | * same token), it will not be reevaluated. This can lead to surprising |
941 | * results when changing scope or the scanner configuration after peeking |
942 | * the next token. Getting the next token after switching the scope or |
943 | * configuration will return whatever was peeked before, regardless of |
944 | * any symbols that may have been added or removed in the new scope. |
945 | * |
946 | * Returns: the type of the token |
947 | */ |
948 | GTokenType |
949 | g_scanner_peek_next_token (GScanner *scanner) |
950 | { |
951 | g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); |
952 | |
953 | if (scanner->next_token == G_TOKEN_NONE) |
954 | { |
955 | scanner->next_line = scanner->line; |
956 | scanner->next_position = scanner->position; |
957 | g_scanner_get_token_i (scanner, |
958 | token_p: &scanner->next_token, |
959 | value_p: &scanner->next_value, |
960 | line_p: &scanner->next_line, |
961 | position_p: &scanner->next_position); |
962 | } |
963 | |
964 | return scanner->next_token; |
965 | } |
966 | |
967 | /** |
968 | * g_scanner_get_next_token: |
969 | * @scanner: a #GScanner |
970 | * |
971 | * Parses the next token just like g_scanner_peek_next_token() |
972 | * and also removes it from the input stream. The token data is |
973 | * placed in the @token, @value, @line, and @position fields of |
974 | * the #GScanner structure. |
975 | * |
976 | * Returns: the type of the token |
977 | */ |
978 | GTokenType |
979 | g_scanner_get_next_token (GScanner *scanner) |
980 | { |
981 | g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); |
982 | |
983 | if (scanner->next_token != G_TOKEN_NONE) |
984 | { |
985 | g_scanner_free_value (token_p: &scanner->token, value_p: &scanner->value); |
986 | |
987 | scanner->token = scanner->next_token; |
988 | scanner->value = scanner->next_value; |
989 | scanner->line = scanner->next_line; |
990 | scanner->position = scanner->next_position; |
991 | scanner->next_token = G_TOKEN_NONE; |
992 | } |
993 | else |
994 | g_scanner_get_token_i (scanner, |
995 | token_p: &scanner->token, |
996 | value_p: &scanner->value, |
997 | line_p: &scanner->line, |
998 | position_p: &scanner->position); |
999 | |
1000 | return scanner->token; |
1001 | } |
1002 | |
1003 | /** |
1004 | * g_scanner_cur_token: |
1005 | * @scanner: a #GScanner |
1006 | * |
1007 | * Gets the current token type. This is simply the @token |
1008 | * field in the #GScanner structure. |
1009 | * |
1010 | * Returns: the current token type |
1011 | */ |
1012 | GTokenType |
1013 | g_scanner_cur_token (GScanner *scanner) |
1014 | { |
1015 | g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); |
1016 | |
1017 | return scanner->token; |
1018 | } |
1019 | |
1020 | /** |
1021 | * g_scanner_cur_value: |
1022 | * @scanner: a #GScanner |
1023 | * |
1024 | * Gets the current token value. This is simply the @value |
1025 | * field in the #GScanner structure. |
1026 | * |
1027 | * Returns: the current token value |
1028 | */ |
1029 | GTokenValue |
1030 | g_scanner_cur_value (GScanner *scanner) |
1031 | { |
1032 | GTokenValue v; |
1033 | |
1034 | v.v_int64 = 0; |
1035 | |
1036 | g_return_val_if_fail (scanner != NULL, v); |
1037 | |
1038 | /* MSC isn't capable of handling return scanner->value; ? */ |
1039 | |
1040 | v = scanner->value; |
1041 | |
1042 | return v; |
1043 | } |
1044 | |
1045 | /** |
1046 | * g_scanner_cur_line: |
1047 | * @scanner: a #GScanner |
1048 | * |
1049 | * Returns the current line in the input stream (counting |
1050 | * from 1). This is the line of the last token parsed via |
1051 | * g_scanner_get_next_token(). |
1052 | * |
1053 | * Returns: the current line |
1054 | */ |
1055 | guint |
1056 | g_scanner_cur_line (GScanner *scanner) |
1057 | { |
1058 | g_return_val_if_fail (scanner != NULL, 0); |
1059 | |
1060 | return scanner->line; |
1061 | } |
1062 | |
1063 | /** |
1064 | * g_scanner_cur_position: |
1065 | * @scanner: a #GScanner |
1066 | * |
1067 | * Returns the current position in the current line (counting |
1068 | * from 0). This is the position of the last token parsed via |
1069 | * g_scanner_get_next_token(). |
1070 | * |
1071 | * Returns: the current position on the line |
1072 | */ |
1073 | guint |
1074 | g_scanner_cur_position (GScanner *scanner) |
1075 | { |
1076 | g_return_val_if_fail (scanner != NULL, 0); |
1077 | |
1078 | return scanner->position; |
1079 | } |
1080 | |
1081 | /** |
1082 | * g_scanner_eof: |
1083 | * @scanner: a #GScanner |
1084 | * |
1085 | * Returns %TRUE if the scanner has reached the end of |
1086 | * the file or text buffer. |
1087 | * |
1088 | * Returns: %TRUE if the scanner has reached the end of |
1089 | * the file or text buffer |
1090 | */ |
1091 | gboolean |
1092 | g_scanner_eof (GScanner *scanner) |
1093 | { |
1094 | g_return_val_if_fail (scanner != NULL, TRUE); |
1095 | |
1096 | return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR; |
1097 | } |
1098 | |
1099 | /** |
1100 | * g_scanner_input_file: |
1101 | * @scanner: a #GScanner |
1102 | * @input_fd: a file descriptor |
1103 | * |
1104 | * Prepares to scan a file. |
1105 | */ |
1106 | void |
1107 | g_scanner_input_file (GScanner *scanner, |
1108 | gint input_fd) |
1109 | { |
1110 | g_return_if_fail (scanner != NULL); |
1111 | g_return_if_fail (input_fd >= 0); |
1112 | |
1113 | if (scanner->input_fd >= 0) |
1114 | g_scanner_sync_file_offset (scanner); |
1115 | |
1116 | scanner->token = G_TOKEN_NONE; |
1117 | scanner->value.v_int64 = 0; |
1118 | scanner->line = 1; |
1119 | scanner->position = 0; |
1120 | scanner->next_token = G_TOKEN_NONE; |
1121 | |
1122 | scanner->input_fd = input_fd; |
1123 | scanner->text = NULL; |
1124 | scanner->text_end = NULL; |
1125 | |
1126 | if (!scanner->buffer) |
1127 | scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1); |
1128 | } |
1129 | |
1130 | /** |
1131 | * g_scanner_input_text: |
1132 | * @scanner: a #GScanner |
1133 | * @text: the text buffer to scan |
1134 | * @text_len: the length of the text buffer |
1135 | * |
1136 | * Prepares to scan a text buffer. |
1137 | */ |
1138 | void |
1139 | g_scanner_input_text (GScanner *scanner, |
1140 | const gchar *text, |
1141 | guint text_len) |
1142 | { |
1143 | g_return_if_fail (scanner != NULL); |
1144 | if (text_len) |
1145 | g_return_if_fail (text != NULL); |
1146 | else |
1147 | text = NULL; |
1148 | |
1149 | if (scanner->input_fd >= 0) |
1150 | g_scanner_sync_file_offset (scanner); |
1151 | |
1152 | scanner->token = G_TOKEN_NONE; |
1153 | scanner->value.v_int64 = 0; |
1154 | scanner->line = 1; |
1155 | scanner->position = 0; |
1156 | scanner->next_token = G_TOKEN_NONE; |
1157 | |
1158 | scanner->input_fd = -1; |
1159 | scanner->text = text; |
1160 | scanner->text_end = text + text_len; |
1161 | |
1162 | if (scanner->buffer) |
1163 | { |
1164 | g_free (mem: scanner->buffer); |
1165 | scanner->buffer = NULL; |
1166 | } |
1167 | } |
1168 | |
1169 | static guchar |
1170 | g_scanner_peek_next_char (GScanner *scanner) |
1171 | { |
1172 | if (scanner->text < scanner->text_end) |
1173 | { |
1174 | return *scanner->text; |
1175 | } |
1176 | else if (scanner->input_fd >= 0) |
1177 | { |
1178 | gint count; |
1179 | gchar *buffer; |
1180 | |
1181 | buffer = scanner->buffer; |
1182 | do |
1183 | { |
1184 | count = read (fd: scanner->input_fd, buf: buffer, READ_BUFFER_SIZE); |
1185 | } |
1186 | while (count == -1 && (errno == EINTR || errno == EAGAIN)); |
1187 | |
1188 | if (count < 1) |
1189 | { |
1190 | scanner->input_fd = -1; |
1191 | |
1192 | return 0; |
1193 | } |
1194 | else |
1195 | { |
1196 | scanner->text = buffer; |
1197 | scanner->text_end = buffer + count; |
1198 | |
1199 | return *buffer; |
1200 | } |
1201 | } |
1202 | else |
1203 | return 0; |
1204 | } |
1205 | |
1206 | /** |
1207 | * g_scanner_sync_file_offset: |
1208 | * @scanner: a #GScanner |
1209 | * |
1210 | * Rewinds the filedescriptor to the current buffer position |
1211 | * and blows the file read ahead buffer. This is useful for |
1212 | * third party uses of the scanners filedescriptor, which hooks |
1213 | * onto the current scanning position. |
1214 | */ |
1215 | void |
1216 | g_scanner_sync_file_offset (GScanner *scanner) |
1217 | { |
1218 | g_return_if_fail (scanner != NULL); |
1219 | |
1220 | /* for file input, rewind the filedescriptor to the current |
1221 | * buffer position and blow the file read ahead buffer. useful |
1222 | * for third party uses of our file descriptor, which hooks |
1223 | * onto the current scanning position. |
1224 | */ |
1225 | |
1226 | if (scanner->input_fd >= 0 && scanner->text_end > scanner->text) |
1227 | { |
1228 | gint buffered; |
1229 | |
1230 | buffered = scanner->text_end - scanner->text; |
1231 | if (lseek (fd: scanner->input_fd, offset: - buffered, SEEK_CUR) >= 0) |
1232 | { |
1233 | /* we succeeded, blow our buffer's contents now */ |
1234 | scanner->text = NULL; |
1235 | scanner->text_end = NULL; |
1236 | } |
1237 | else |
1238 | errno = 0; |
1239 | } |
1240 | } |
1241 | |
1242 | static guchar |
1243 | g_scanner_get_char (GScanner *scanner, |
1244 | guint *line_p, |
1245 | guint *position_p) |
1246 | { |
1247 | guchar fchar; |
1248 | |
1249 | if (scanner->text < scanner->text_end) |
1250 | fchar = *(scanner->text++); |
1251 | else if (scanner->input_fd >= 0) |
1252 | { |
1253 | gint count; |
1254 | gchar *buffer; |
1255 | |
1256 | buffer = scanner->buffer; |
1257 | do |
1258 | { |
1259 | count = read (fd: scanner->input_fd, buf: buffer, READ_BUFFER_SIZE); |
1260 | } |
1261 | while (count == -1 && (errno == EINTR || errno == EAGAIN)); |
1262 | |
1263 | if (count < 1) |
1264 | { |
1265 | scanner->input_fd = -1; |
1266 | fchar = 0; |
1267 | } |
1268 | else |
1269 | { |
1270 | scanner->text = buffer + 1; |
1271 | scanner->text_end = buffer + count; |
1272 | fchar = *buffer; |
1273 | if (!fchar) |
1274 | { |
1275 | g_scanner_sync_file_offset (scanner); |
1276 | scanner->text_end = scanner->text; |
1277 | scanner->input_fd = -1; |
1278 | } |
1279 | } |
1280 | } |
1281 | else |
1282 | fchar = 0; |
1283 | |
1284 | if (fchar == '\n') |
1285 | { |
1286 | (*position_p) = 0; |
1287 | (*line_p)++; |
1288 | } |
1289 | else if (fchar) |
1290 | { |
1291 | (*position_p)++; |
1292 | } |
1293 | |
1294 | return fchar; |
1295 | } |
1296 | |
1297 | /** |
1298 | * g_scanner_unexp_token: |
1299 | * @scanner: a #GScanner |
1300 | * @expected_token: the expected token |
1301 | * @identifier_spec: a string describing how the scanner's user |
1302 | * refers to identifiers (%NULL defaults to "identifier"). |
1303 | * This is used if @expected_token is %G_TOKEN_IDENTIFIER or |
1304 | * %G_TOKEN_IDENTIFIER_NULL. |
1305 | * @symbol_spec: a string describing how the scanner's user refers |
1306 | * to symbols (%NULL defaults to "symbol"). This is used if |
1307 | * @expected_token is %G_TOKEN_SYMBOL or any token value greater |
1308 | * than %G_TOKEN_LAST. |
1309 | * @symbol_name: the name of the symbol, if the scanner's current |
1310 | * token is a symbol. |
1311 | * @message: a message string to output at the end of the |
1312 | * warning/error, or %NULL. |
1313 | * @is_error: if %TRUE it is output as an error. If %FALSE it is |
1314 | * output as a warning. |
1315 | * |
1316 | * Outputs a message through the scanner's msg_handler, |
1317 | * resulting from an unexpected token in the input stream. |
1318 | * Note that you should not call g_scanner_peek_next_token() |
1319 | * followed by g_scanner_unexp_token() without an intermediate |
1320 | * call to g_scanner_get_next_token(), as g_scanner_unexp_token() |
1321 | * evaluates the scanner's current token (not the peeked token) |
1322 | * to construct part of the message. |
1323 | */ |
1324 | void |
1325 | g_scanner_unexp_token (GScanner *scanner, |
1326 | GTokenType expected_token, |
1327 | const gchar *identifier_spec, |
1328 | const gchar *symbol_spec, |
1329 | const gchar *symbol_name, |
1330 | const gchar *message, |
1331 | gint is_error) |
1332 | { |
1333 | gchar *token_string; |
1334 | guint token_string_len; |
1335 | gchar *expected_string; |
1336 | guint expected_string_len; |
1337 | gchar *message_prefix; |
1338 | gboolean print_unexp; |
1339 | void (*msg_handler) (GScanner*, const gchar*, ...); |
1340 | |
1341 | g_return_if_fail (scanner != NULL); |
1342 | |
1343 | if (is_error) |
1344 | msg_handler = g_scanner_error; |
1345 | else |
1346 | msg_handler = g_scanner_warn; |
1347 | |
1348 | if (!identifier_spec) |
1349 | identifier_spec = "identifier" ; |
1350 | if (!symbol_spec) |
1351 | symbol_spec = "symbol" ; |
1352 | |
1353 | token_string_len = 56; |
1354 | token_string = g_new (gchar, token_string_len + 1); |
1355 | expected_string_len = 64; |
1356 | expected_string = g_new (gchar, expected_string_len + 1); |
1357 | print_unexp = TRUE; |
1358 | |
1359 | switch (scanner->token) |
1360 | { |
1361 | case G_TOKEN_EOF: |
1362 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "end of file" ); |
1363 | break; |
1364 | |
1365 | default: |
1366 | if (scanner->token >= 1 && scanner->token <= 255) |
1367 | { |
1368 | if ((scanner->token >= ' ' && scanner->token <= '~') || |
1369 | strchr (s: scanner->config->cset_identifier_first, c: scanner->token) || |
1370 | strchr (s: scanner->config->cset_identifier_nth, c: scanner->token)) |
1371 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "character '%c'" , scanner->token); |
1372 | else |
1373 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "character '\\%o'" , scanner->token); |
1374 | break; |
1375 | } |
1376 | else if (!scanner->config->symbol_2_token) |
1377 | { |
1378 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "(unknown) token <%d>" , scanner->token); |
1379 | break; |
1380 | } |
1381 | G_GNUC_FALLTHROUGH; |
1382 | case G_TOKEN_SYMBOL: |
1383 | if (expected_token == G_TOKEN_SYMBOL || |
1384 | (scanner->config->symbol_2_token && |
1385 | expected_token > G_TOKEN_LAST)) |
1386 | print_unexp = FALSE; |
1387 | if (symbol_name) |
1388 | _g_snprintf (s: token_string, |
1389 | maxlen: token_string_len, |
1390 | format: "%s%s '%s'" , |
1391 | print_unexp ? "" : "invalid " , |
1392 | symbol_spec, |
1393 | symbol_name); |
1394 | else |
1395 | _g_snprintf (s: token_string, |
1396 | maxlen: token_string_len, |
1397 | format: "%s%s" , |
1398 | print_unexp ? "" : "invalid " , |
1399 | symbol_spec); |
1400 | break; |
1401 | |
1402 | case G_TOKEN_ERROR: |
1403 | print_unexp = FALSE; |
1404 | expected_token = G_TOKEN_NONE; |
1405 | switch (scanner->value.v_error) |
1406 | { |
1407 | case G_ERR_UNEXP_EOF: |
1408 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: unexpected end of file" ); |
1409 | break; |
1410 | |
1411 | case G_ERR_UNEXP_EOF_IN_STRING: |
1412 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: unterminated string constant" ); |
1413 | break; |
1414 | |
1415 | case G_ERR_UNEXP_EOF_IN_COMMENT: |
1416 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: unterminated comment" ); |
1417 | break; |
1418 | |
1419 | case G_ERR_NON_DIGIT_IN_CONST: |
1420 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: non digit in constant" ); |
1421 | break; |
1422 | |
1423 | case G_ERR_FLOAT_RADIX: |
1424 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: invalid radix for floating constant" ); |
1425 | break; |
1426 | |
1427 | case G_ERR_FLOAT_MALFORMED: |
1428 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: malformed floating constant" ); |
1429 | break; |
1430 | |
1431 | case G_ERR_DIGIT_RADIX: |
1432 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: digit is beyond radix" ); |
1433 | break; |
1434 | |
1435 | case G_ERR_UNKNOWN: |
1436 | default: |
1437 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "scanner: unknown error" ); |
1438 | break; |
1439 | } |
1440 | break; |
1441 | |
1442 | case G_TOKEN_CHAR: |
1443 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "character '%c'" , scanner->value.v_char); |
1444 | break; |
1445 | |
1446 | case G_TOKEN_IDENTIFIER: |
1447 | case G_TOKEN_IDENTIFIER_NULL: |
1448 | if (expected_token == G_TOKEN_IDENTIFIER || |
1449 | expected_token == G_TOKEN_IDENTIFIER_NULL) |
1450 | print_unexp = FALSE; |
1451 | _g_snprintf (s: token_string, |
1452 | maxlen: token_string_len, |
1453 | format: "%s%s '%s'" , |
1454 | print_unexp ? "" : "invalid " , |
1455 | identifier_spec, |
1456 | scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null" ); |
1457 | break; |
1458 | |
1459 | case G_TOKEN_BINARY: |
1460 | case G_TOKEN_OCTAL: |
1461 | case G_TOKEN_INT: |
1462 | case G_TOKEN_HEX: |
1463 | if (scanner->config->store_int64) |
1464 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "number '%" G_GUINT64_FORMAT "'" , scanner->value.v_int64); |
1465 | else |
1466 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "number '%lu'" , scanner->value.v_int); |
1467 | break; |
1468 | |
1469 | case G_TOKEN_FLOAT: |
1470 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "number '%.3f'" , scanner->value.v_float); |
1471 | break; |
1472 | |
1473 | case G_TOKEN_STRING: |
1474 | if (expected_token == G_TOKEN_STRING) |
1475 | print_unexp = FALSE; |
1476 | _g_snprintf (s: token_string, |
1477 | maxlen: token_string_len, |
1478 | format: "%s%sstring constant \"%s\"" , |
1479 | print_unexp ? "" : "invalid " , |
1480 | scanner->value.v_string[0] == 0 ? "empty " : "" , |
1481 | scanner->value.v_string); |
1482 | token_string[token_string_len - 2] = '"'; |
1483 | token_string[token_string_len - 1] = 0; |
1484 | break; |
1485 | |
1486 | case G_TOKEN_COMMENT_SINGLE: |
1487 | case G_TOKEN_COMMENT_MULTI: |
1488 | _g_snprintf (s: token_string, maxlen: token_string_len, format: "comment" ); |
1489 | break; |
1490 | |
1491 | case G_TOKEN_NONE: |
1492 | /* somehow the user's parsing code is screwed, there isn't much |
1493 | * we can do about it. |
1494 | * Note, a common case to trigger this is |
1495 | * g_scanner_peek_next_token(); g_scanner_unexp_token(); |
1496 | * without an intermediate g_scanner_get_next_token(). |
1497 | */ |
1498 | g_assert_not_reached (); |
1499 | break; |
1500 | } |
1501 | |
1502 | |
1503 | switch (expected_token) |
1504 | { |
1505 | gboolean need_valid; |
1506 | gchar *tstring; |
1507 | case G_TOKEN_EOF: |
1508 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "end of file" ); |
1509 | break; |
1510 | default: |
1511 | if (expected_token >= 1 && expected_token <= 255) |
1512 | { |
1513 | if ((expected_token >= ' ' && expected_token <= '~') || |
1514 | strchr (s: scanner->config->cset_identifier_first, c: expected_token) || |
1515 | strchr (s: scanner->config->cset_identifier_nth, c: expected_token)) |
1516 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "character '%c'" , expected_token); |
1517 | else |
1518 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "character '\\%o'" , expected_token); |
1519 | break; |
1520 | } |
1521 | else if (!scanner->config->symbol_2_token) |
1522 | { |
1523 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "(unknown) token <%d>" , expected_token); |
1524 | break; |
1525 | } |
1526 | G_GNUC_FALLTHROUGH; |
1527 | case G_TOKEN_SYMBOL: |
1528 | need_valid = (scanner->token == G_TOKEN_SYMBOL || |
1529 | (scanner->config->symbol_2_token && |
1530 | scanner->token > G_TOKEN_LAST)); |
1531 | _g_snprintf (s: expected_string, |
1532 | maxlen: expected_string_len, |
1533 | format: "%s%s" , |
1534 | need_valid ? "valid " : "" , |
1535 | symbol_spec); |
1536 | /* FIXME: should we attempt to look up the symbol_name for symbol_2_token? */ |
1537 | break; |
1538 | case G_TOKEN_CHAR: |
1539 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%scharacter" , |
1540 | scanner->token == G_TOKEN_CHAR ? "valid " : "" ); |
1541 | break; |
1542 | case G_TOKEN_BINARY: |
1543 | tstring = "binary" ; |
1544 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%snumber (%s)" , |
1545 | scanner->token == expected_token ? "valid " : "" , tstring); |
1546 | break; |
1547 | case G_TOKEN_OCTAL: |
1548 | tstring = "octal" ; |
1549 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%snumber (%s)" , |
1550 | scanner->token == expected_token ? "valid " : "" , tstring); |
1551 | break; |
1552 | case G_TOKEN_INT: |
1553 | tstring = "integer" ; |
1554 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%snumber (%s)" , |
1555 | scanner->token == expected_token ? "valid " : "" , tstring); |
1556 | break; |
1557 | case G_TOKEN_HEX: |
1558 | tstring = "hexadecimal" ; |
1559 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%snumber (%s)" , |
1560 | scanner->token == expected_token ? "valid " : "" , tstring); |
1561 | break; |
1562 | case G_TOKEN_FLOAT: |
1563 | tstring = "float" ; |
1564 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%snumber (%s)" , |
1565 | scanner->token == expected_token ? "valid " : "" , tstring); |
1566 | break; |
1567 | case G_TOKEN_STRING: |
1568 | _g_snprintf (s: expected_string, |
1569 | maxlen: expected_string_len, |
1570 | format: "%sstring constant" , |
1571 | scanner->token == G_TOKEN_STRING ? "valid " : "" ); |
1572 | break; |
1573 | case G_TOKEN_IDENTIFIER: |
1574 | case G_TOKEN_IDENTIFIER_NULL: |
1575 | need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL || |
1576 | scanner->token == G_TOKEN_IDENTIFIER); |
1577 | _g_snprintf (s: expected_string, |
1578 | maxlen: expected_string_len, |
1579 | format: "%s%s" , |
1580 | need_valid ? "valid " : "" , |
1581 | identifier_spec); |
1582 | break; |
1583 | case G_TOKEN_COMMENT_SINGLE: |
1584 | tstring = "single-line" ; |
1585 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%scomment (%s)" , |
1586 | scanner->token == expected_token ? "valid " : "" , tstring); |
1587 | break; |
1588 | case G_TOKEN_COMMENT_MULTI: |
1589 | tstring = "multi-line" ; |
1590 | _g_snprintf (s: expected_string, maxlen: expected_string_len, format: "%scomment (%s)" , |
1591 | scanner->token == expected_token ? "valid " : "" , tstring); |
1592 | break; |
1593 | case G_TOKEN_NONE: |
1594 | case G_TOKEN_ERROR: |
1595 | /* this is handled upon printout */ |
1596 | break; |
1597 | } |
1598 | |
1599 | if (message && message[0] != 0) |
1600 | message_prefix = " - " ; |
1601 | else |
1602 | { |
1603 | message_prefix = "" ; |
1604 | message = "" ; |
1605 | } |
1606 | if (expected_token == G_TOKEN_ERROR) |
1607 | { |
1608 | msg_handler (scanner, |
1609 | "failure around %s%s%s" , |
1610 | token_string, |
1611 | message_prefix, |
1612 | message); |
1613 | } |
1614 | else if (expected_token == G_TOKEN_NONE) |
1615 | { |
1616 | if (print_unexp) |
1617 | msg_handler (scanner, |
1618 | "unexpected %s%s%s" , |
1619 | token_string, |
1620 | message_prefix, |
1621 | message); |
1622 | else |
1623 | msg_handler (scanner, |
1624 | "%s%s%s" , |
1625 | token_string, |
1626 | message_prefix, |
1627 | message); |
1628 | } |
1629 | else |
1630 | { |
1631 | if (print_unexp) |
1632 | msg_handler (scanner, |
1633 | "unexpected %s, expected %s%s%s" , |
1634 | token_string, |
1635 | expected_string, |
1636 | message_prefix, |
1637 | message); |
1638 | else |
1639 | msg_handler (scanner, |
1640 | "%s, expected %s%s%s" , |
1641 | token_string, |
1642 | expected_string, |
1643 | message_prefix, |
1644 | message); |
1645 | } |
1646 | |
1647 | g_free (mem: token_string); |
1648 | g_free (mem: expected_string); |
1649 | } |
1650 | |
1651 | static void |
1652 | g_scanner_get_token_i (GScanner *scanner, |
1653 | GTokenType *token_p, |
1654 | GTokenValue *value_p, |
1655 | guint *line_p, |
1656 | guint *position_p) |
1657 | { |
1658 | do |
1659 | { |
1660 | g_scanner_free_value (token_p, value_p); |
1661 | g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p); |
1662 | } |
1663 | while (((*token_p > 0 && *token_p < 256) && |
1664 | strchr (s: scanner->config->cset_skip_characters, c: *token_p)) || |
1665 | (*token_p == G_TOKEN_CHAR && |
1666 | strchr (s: scanner->config->cset_skip_characters, c: value_p->v_char)) || |
1667 | (*token_p == G_TOKEN_COMMENT_MULTI && |
1668 | scanner->config->skip_comment_multi) || |
1669 | (*token_p == G_TOKEN_COMMENT_SINGLE && |
1670 | scanner->config->skip_comment_single)); |
1671 | |
1672 | switch (*token_p) |
1673 | { |
1674 | case G_TOKEN_IDENTIFIER: |
1675 | if (scanner->config->identifier_2_string) |
1676 | *token_p = G_TOKEN_STRING; |
1677 | break; |
1678 | |
1679 | case G_TOKEN_SYMBOL: |
1680 | if (scanner->config->symbol_2_token) |
1681 | *token_p = (GTokenType) value_p->v_symbol; |
1682 | break; |
1683 | |
1684 | case G_TOKEN_BINARY: |
1685 | case G_TOKEN_OCTAL: |
1686 | case G_TOKEN_HEX: |
1687 | if (scanner->config->numbers_2_int) |
1688 | *token_p = G_TOKEN_INT; |
1689 | break; |
1690 | |
1691 | default: |
1692 | break; |
1693 | } |
1694 | |
1695 | if (*token_p == G_TOKEN_INT && |
1696 | scanner->config->int_2_float) |
1697 | { |
1698 | *token_p = G_TOKEN_FLOAT; |
1699 | |
1700 | /* Have to assign through a temporary variable to avoid undefined behaviour |
1701 | * by copying between potentially-overlapping union members. */ |
1702 | if (scanner->config->store_int64) |
1703 | { |
1704 | gint64 temp = value_p->v_int64; |
1705 | value_p->v_float = temp; |
1706 | } |
1707 | else |
1708 | { |
1709 | gint temp = value_p->v_int; |
1710 | value_p->v_float = temp; |
1711 | } |
1712 | } |
1713 | |
1714 | errno = 0; |
1715 | } |
1716 | |
1717 | static void |
1718 | g_scanner_get_token_ll (GScanner *scanner, |
1719 | GTokenType *token_p, |
1720 | GTokenValue *value_p, |
1721 | guint *line_p, |
1722 | guint *position_p) |
1723 | { |
1724 | GScannerConfig *config; |
1725 | GTokenType token; |
1726 | gboolean ; |
1727 | gboolean ; |
1728 | gboolean in_string_sq; |
1729 | gboolean in_string_dq; |
1730 | GString *gstring; |
1731 | GTokenValue value; |
1732 | guchar ch; |
1733 | |
1734 | config = scanner->config; |
1735 | (*value_p).v_int64 = 0; |
1736 | |
1737 | if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) || |
1738 | scanner->token == G_TOKEN_EOF) |
1739 | { |
1740 | *token_p = G_TOKEN_EOF; |
1741 | return; |
1742 | } |
1743 | |
1744 | in_comment_multi = FALSE; |
1745 | in_comment_single = FALSE; |
1746 | in_string_sq = FALSE; |
1747 | in_string_dq = FALSE; |
1748 | gstring = NULL; |
1749 | |
1750 | do /* while (ch != 0) */ |
1751 | { |
1752 | gboolean dotted_float = FALSE; |
1753 | |
1754 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1755 | |
1756 | value.v_int64 = 0; |
1757 | token = G_TOKEN_NONE; |
1758 | |
1759 | /* this is *evil*, but needed ;( |
1760 | * we first check for identifier first character, because it |
1761 | * might interfere with other key chars like slashes or numbers |
1762 | */ |
1763 | if (config->scan_identifier && |
1764 | ch && strchr (s: config->cset_identifier_first, c: ch)) |
1765 | goto identifier_precedence; |
1766 | |
1767 | switch (ch) |
1768 | { |
1769 | case 0: |
1770 | token = G_TOKEN_EOF; |
1771 | (*position_p)++; |
1772 | /* ch = 0; */ |
1773 | break; |
1774 | |
1775 | case '/': |
1776 | if (!config->scan_comment_multi || |
1777 | g_scanner_peek_next_char (scanner) != '*') |
1778 | goto default_case; |
1779 | g_scanner_get_char (scanner, line_p, position_p); |
1780 | token = G_TOKEN_COMMENT_MULTI; |
1781 | in_comment_multi = TRUE; |
1782 | gstring = g_string_new (NULL); |
1783 | while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) |
1784 | { |
1785 | if (ch == '*' && g_scanner_peek_next_char (scanner) == '/') |
1786 | { |
1787 | g_scanner_get_char (scanner, line_p, position_p); |
1788 | in_comment_multi = FALSE; |
1789 | break; |
1790 | } |
1791 | else |
1792 | gstring = g_string_append_c (gstring, ch); |
1793 | } |
1794 | ch = 0; |
1795 | break; |
1796 | |
1797 | case '\'': |
1798 | if (!config->scan_string_sq) |
1799 | goto default_case; |
1800 | token = G_TOKEN_STRING; |
1801 | in_string_sq = TRUE; |
1802 | gstring = g_string_new (NULL); |
1803 | while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) |
1804 | { |
1805 | if (ch == '\'') |
1806 | { |
1807 | in_string_sq = FALSE; |
1808 | break; |
1809 | } |
1810 | else |
1811 | gstring = g_string_append_c (gstring, ch); |
1812 | } |
1813 | ch = 0; |
1814 | break; |
1815 | |
1816 | case '"': |
1817 | if (!config->scan_string_dq) |
1818 | goto default_case; |
1819 | token = G_TOKEN_STRING; |
1820 | in_string_dq = TRUE; |
1821 | gstring = g_string_new (NULL); |
1822 | while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) |
1823 | { |
1824 | if (ch == '"') |
1825 | { |
1826 | in_string_dq = FALSE; |
1827 | break; |
1828 | } |
1829 | else |
1830 | { |
1831 | if (ch == '\\') |
1832 | { |
1833 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1834 | switch (ch) |
1835 | { |
1836 | guint i; |
1837 | guint fchar; |
1838 | |
1839 | case 0: |
1840 | break; |
1841 | |
1842 | case '\\': |
1843 | gstring = g_string_append_c (gstring, '\\'); |
1844 | break; |
1845 | |
1846 | case 'n': |
1847 | gstring = g_string_append_c (gstring, '\n'); |
1848 | break; |
1849 | |
1850 | case 't': |
1851 | gstring = g_string_append_c (gstring, '\t'); |
1852 | break; |
1853 | |
1854 | case 'r': |
1855 | gstring = g_string_append_c (gstring, '\r'); |
1856 | break; |
1857 | |
1858 | case 'b': |
1859 | gstring = g_string_append_c (gstring, '\b'); |
1860 | break; |
1861 | |
1862 | case 'f': |
1863 | gstring = g_string_append_c (gstring, '\f'); |
1864 | break; |
1865 | |
1866 | case '0': |
1867 | case '1': |
1868 | case '2': |
1869 | case '3': |
1870 | case '4': |
1871 | case '5': |
1872 | case '6': |
1873 | case '7': |
1874 | i = ch - '0'; |
1875 | fchar = g_scanner_peek_next_char (scanner); |
1876 | if (fchar >= '0' && fchar <= '7') |
1877 | { |
1878 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1879 | i = i * 8 + ch - '0'; |
1880 | fchar = g_scanner_peek_next_char (scanner); |
1881 | if (fchar >= '0' && fchar <= '7') |
1882 | { |
1883 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1884 | i = i * 8 + ch - '0'; |
1885 | } |
1886 | } |
1887 | gstring = g_string_append_c (gstring, i); |
1888 | break; |
1889 | |
1890 | default: |
1891 | gstring = g_string_append_c (gstring, ch); |
1892 | break; |
1893 | } |
1894 | } |
1895 | else |
1896 | gstring = g_string_append_c (gstring, ch); |
1897 | } |
1898 | } |
1899 | ch = 0; |
1900 | break; |
1901 | |
1902 | case '.': |
1903 | if (!config->scan_float) |
1904 | goto default_case; |
1905 | token = G_TOKEN_FLOAT; |
1906 | dotted_float = TRUE; |
1907 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1908 | goto number_parsing; |
1909 | |
1910 | case '$': |
1911 | if (!config->scan_hex_dollar) |
1912 | goto default_case; |
1913 | token = G_TOKEN_HEX; |
1914 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1915 | goto number_parsing; |
1916 | |
1917 | case '0': |
1918 | if (config->scan_octal) |
1919 | token = G_TOKEN_OCTAL; |
1920 | else |
1921 | token = G_TOKEN_INT; |
1922 | ch = g_scanner_peek_next_char (scanner); |
1923 | if (config->scan_hex && (ch == 'x' || ch == 'X')) |
1924 | { |
1925 | token = G_TOKEN_HEX; |
1926 | g_scanner_get_char (scanner, line_p, position_p); |
1927 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1928 | if (ch == 0) |
1929 | { |
1930 | token = G_TOKEN_ERROR; |
1931 | value.v_error = G_ERR_UNEXP_EOF; |
1932 | (*position_p)++; |
1933 | break; |
1934 | } |
1935 | if (g_scanner_char_2_num (c: ch, base: 16) < 0) |
1936 | { |
1937 | token = G_TOKEN_ERROR; |
1938 | value.v_error = G_ERR_DIGIT_RADIX; |
1939 | ch = 0; |
1940 | break; |
1941 | } |
1942 | } |
1943 | else if (config->scan_binary && (ch == 'b' || ch == 'B')) |
1944 | { |
1945 | token = G_TOKEN_BINARY; |
1946 | g_scanner_get_char (scanner, line_p, position_p); |
1947 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1948 | if (ch == 0) |
1949 | { |
1950 | token = G_TOKEN_ERROR; |
1951 | value.v_error = G_ERR_UNEXP_EOF; |
1952 | (*position_p)++; |
1953 | break; |
1954 | } |
1955 | if (g_scanner_char_2_num (c: ch, base: 10) < 0) |
1956 | { |
1957 | token = G_TOKEN_ERROR; |
1958 | value.v_error = G_ERR_NON_DIGIT_IN_CONST; |
1959 | ch = 0; |
1960 | break; |
1961 | } |
1962 | } |
1963 | else |
1964 | ch = '0'; |
1965 | G_GNUC_FALLTHROUGH; |
1966 | case '1': |
1967 | case '2': |
1968 | case '3': |
1969 | case '4': |
1970 | case '5': |
1971 | case '6': |
1972 | case '7': |
1973 | case '8': |
1974 | case '9': |
1975 | number_parsing: |
1976 | { |
1977 | gboolean in_number = TRUE; |
1978 | gchar *endptr; |
1979 | |
1980 | if (token == G_TOKEN_NONE) |
1981 | token = G_TOKEN_INT; |
1982 | |
1983 | gstring = g_string_new (init: dotted_float ? "0." : "" ); |
1984 | gstring = g_string_append_c (gstring, ch); |
1985 | |
1986 | do /* while (in_number) */ |
1987 | { |
1988 | gboolean is_E; |
1989 | |
1990 | is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E'); |
1991 | |
1992 | ch = g_scanner_peek_next_char (scanner); |
1993 | |
1994 | if (g_scanner_char_2_num (c: ch, base: 36) >= 0 || |
1995 | (config->scan_float && ch == '.') || |
1996 | (is_E && (ch == '+' || ch == '-'))) |
1997 | { |
1998 | ch = g_scanner_get_char (scanner, line_p, position_p); |
1999 | |
2000 | switch (ch) |
2001 | { |
2002 | case '.': |
2003 | if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL) |
2004 | { |
2005 | value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX; |
2006 | token = G_TOKEN_ERROR; |
2007 | in_number = FALSE; |
2008 | } |
2009 | else |
2010 | { |
2011 | token = G_TOKEN_FLOAT; |
2012 | gstring = g_string_append_c (gstring, ch); |
2013 | } |
2014 | break; |
2015 | |
2016 | case '0': |
2017 | case '1': |
2018 | case '2': |
2019 | case '3': |
2020 | case '4': |
2021 | case '5': |
2022 | case '6': |
2023 | case '7': |
2024 | case '8': |
2025 | case '9': |
2026 | gstring = g_string_append_c (gstring, ch); |
2027 | break; |
2028 | |
2029 | case '-': |
2030 | case '+': |
2031 | if (token != G_TOKEN_FLOAT) |
2032 | { |
2033 | token = G_TOKEN_ERROR; |
2034 | value.v_error = G_ERR_NON_DIGIT_IN_CONST; |
2035 | in_number = FALSE; |
2036 | } |
2037 | else |
2038 | gstring = g_string_append_c (gstring, ch); |
2039 | break; |
2040 | |
2041 | case 'e': |
2042 | case 'E': |
2043 | if ((token != G_TOKEN_HEX && !config->scan_float) || |
2044 | (token != G_TOKEN_HEX && |
2045 | token != G_TOKEN_OCTAL && |
2046 | token != G_TOKEN_FLOAT && |
2047 | token != G_TOKEN_INT)) |
2048 | { |
2049 | token = G_TOKEN_ERROR; |
2050 | value.v_error = G_ERR_NON_DIGIT_IN_CONST; |
2051 | in_number = FALSE; |
2052 | } |
2053 | else |
2054 | { |
2055 | if (token != G_TOKEN_HEX) |
2056 | token = G_TOKEN_FLOAT; |
2057 | gstring = g_string_append_c (gstring, ch); |
2058 | } |
2059 | break; |
2060 | |
2061 | default: |
2062 | if (token != G_TOKEN_HEX) |
2063 | { |
2064 | token = G_TOKEN_ERROR; |
2065 | value.v_error = G_ERR_NON_DIGIT_IN_CONST; |
2066 | in_number = FALSE; |
2067 | } |
2068 | else |
2069 | gstring = g_string_append_c (gstring, ch); |
2070 | break; |
2071 | } |
2072 | } |
2073 | else |
2074 | in_number = FALSE; |
2075 | } |
2076 | while (in_number); |
2077 | |
2078 | endptr = NULL; |
2079 | if (token == G_TOKEN_FLOAT) |
2080 | value.v_float = g_strtod (nptr: gstring->str, endptr: &endptr); |
2081 | else |
2082 | { |
2083 | guint64 ui64 = 0; |
2084 | switch (token) |
2085 | { |
2086 | case G_TOKEN_BINARY: |
2087 | ui64 = g_ascii_strtoull (nptr: gstring->str, endptr: &endptr, base: 2); |
2088 | break; |
2089 | case G_TOKEN_OCTAL: |
2090 | ui64 = g_ascii_strtoull (nptr: gstring->str, endptr: &endptr, base: 8); |
2091 | break; |
2092 | case G_TOKEN_INT: |
2093 | ui64 = g_ascii_strtoull (nptr: gstring->str, endptr: &endptr, base: 10); |
2094 | break; |
2095 | case G_TOKEN_HEX: |
2096 | ui64 = g_ascii_strtoull (nptr: gstring->str, endptr: &endptr, base: 16); |
2097 | break; |
2098 | default: ; |
2099 | } |
2100 | if (scanner->config->store_int64) |
2101 | value.v_int64 = ui64; |
2102 | else |
2103 | value.v_int = ui64; |
2104 | } |
2105 | if (endptr && *endptr) |
2106 | { |
2107 | token = G_TOKEN_ERROR; |
2108 | if (*endptr == 'e' || *endptr == 'E') |
2109 | value.v_error = G_ERR_NON_DIGIT_IN_CONST; |
2110 | else |
2111 | value.v_error = G_ERR_DIGIT_RADIX; |
2112 | } |
2113 | g_string_free (string: gstring, TRUE); |
2114 | gstring = NULL; |
2115 | ch = 0; |
2116 | } /* number_parsing:... */ |
2117 | break; |
2118 | |
2119 | default: |
2120 | default_case: |
2121 | { |
2122 | if (config->cpair_comment_single && |
2123 | ch == config->cpair_comment_single[0]) |
2124 | { |
2125 | token = G_TOKEN_COMMENT_SINGLE; |
2126 | in_comment_single = TRUE; |
2127 | gstring = g_string_new (NULL); |
2128 | ch = g_scanner_get_char (scanner, line_p, position_p); |
2129 | while (ch != 0) |
2130 | { |
2131 | if (ch == config->cpair_comment_single[1]) |
2132 | { |
2133 | in_comment_single = FALSE; |
2134 | ch = 0; |
2135 | break; |
2136 | } |
2137 | |
2138 | gstring = g_string_append_c (gstring, ch); |
2139 | ch = g_scanner_get_char (scanner, line_p, position_p); |
2140 | } |
2141 | /* ignore a missing newline at EOF for single line comments */ |
2142 | if (in_comment_single && |
2143 | config->cpair_comment_single[1] == '\n') |
2144 | in_comment_single = FALSE; |
2145 | } |
2146 | else if (config->scan_identifier && ch && |
2147 | strchr (s: config->cset_identifier_first, c: ch)) |
2148 | { |
2149 | identifier_precedence: |
2150 | |
2151 | if (config->cset_identifier_nth && ch && |
2152 | strchr (s: config->cset_identifier_nth, |
2153 | c: g_scanner_peek_next_char (scanner))) |
2154 | { |
2155 | token = G_TOKEN_IDENTIFIER; |
2156 | gstring = g_string_new (NULL); |
2157 | gstring = g_string_append_c (gstring, ch); |
2158 | do |
2159 | { |
2160 | ch = g_scanner_get_char (scanner, line_p, position_p); |
2161 | gstring = g_string_append_c (gstring, ch); |
2162 | ch = g_scanner_peek_next_char (scanner); |
2163 | } |
2164 | while (ch && strchr (s: config->cset_identifier_nth, c: ch)); |
2165 | ch = 0; |
2166 | } |
2167 | else if (config->scan_identifier_1char) |
2168 | { |
2169 | token = G_TOKEN_IDENTIFIER; |
2170 | value.v_identifier = g_new0 (gchar, 2); |
2171 | value.v_identifier[0] = ch; |
2172 | ch = 0; |
2173 | } |
2174 | } |
2175 | if (ch) |
2176 | { |
2177 | if (config->char_2_token) |
2178 | token = ch; |
2179 | else |
2180 | { |
2181 | token = G_TOKEN_CHAR; |
2182 | value.v_char = ch; |
2183 | } |
2184 | ch = 0; |
2185 | } |
2186 | } /* default_case:... */ |
2187 | break; |
2188 | } |
2189 | g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */ |
2190 | } |
2191 | while (ch != 0); |
2192 | |
2193 | if (in_comment_multi || in_comment_single || |
2194 | in_string_sq || in_string_dq) |
2195 | { |
2196 | token = G_TOKEN_ERROR; |
2197 | if (gstring) |
2198 | { |
2199 | g_string_free (string: gstring, TRUE); |
2200 | gstring = NULL; |
2201 | } |
2202 | (*position_p)++; |
2203 | if (in_comment_multi || in_comment_single) |
2204 | value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT; |
2205 | else /* (in_string_sq || in_string_dq) */ |
2206 | value.v_error = G_ERR_UNEXP_EOF_IN_STRING; |
2207 | } |
2208 | |
2209 | if (gstring) |
2210 | { |
2211 | value.v_string = g_string_free (string: gstring, FALSE); |
2212 | gstring = NULL; |
2213 | } |
2214 | |
2215 | if (token == G_TOKEN_IDENTIFIER) |
2216 | { |
2217 | if (config->scan_symbols) |
2218 | { |
2219 | GScannerKey *key; |
2220 | guint scope_id; |
2221 | |
2222 | scope_id = scanner->scope_id; |
2223 | key = g_scanner_lookup_internal (scanner, scope_id, symbol: value.v_identifier); |
2224 | if (!key && scope_id && scanner->config->scope_0_fallback) |
2225 | key = g_scanner_lookup_internal (scanner, scope_id: 0, symbol: value.v_identifier); |
2226 | |
2227 | if (key) |
2228 | { |
2229 | g_free (mem: value.v_identifier); |
2230 | token = G_TOKEN_SYMBOL; |
2231 | value.v_symbol = key->value; |
2232 | } |
2233 | } |
2234 | |
2235 | if (token == G_TOKEN_IDENTIFIER && |
2236 | config->scan_identifier_NULL && |
2237 | strlen (s: value.v_identifier) == 4) |
2238 | { |
2239 | gchar *null_upper = "NULL" ; |
2240 | gchar *null_lower = "null" ; |
2241 | |
2242 | if (scanner->config->case_sensitive) |
2243 | { |
2244 | if (value.v_identifier[0] == null_upper[0] && |
2245 | value.v_identifier[1] == null_upper[1] && |
2246 | value.v_identifier[2] == null_upper[2] && |
2247 | value.v_identifier[3] == null_upper[3]) |
2248 | token = G_TOKEN_IDENTIFIER_NULL; |
2249 | } |
2250 | else |
2251 | { |
2252 | if ((value.v_identifier[0] == null_upper[0] || |
2253 | value.v_identifier[0] == null_lower[0]) && |
2254 | (value.v_identifier[1] == null_upper[1] || |
2255 | value.v_identifier[1] == null_lower[1]) && |
2256 | (value.v_identifier[2] == null_upper[2] || |
2257 | value.v_identifier[2] == null_lower[2]) && |
2258 | (value.v_identifier[3] == null_upper[3] || |
2259 | value.v_identifier[3] == null_lower[3])) |
2260 | token = G_TOKEN_IDENTIFIER_NULL; |
2261 | } |
2262 | } |
2263 | } |
2264 | |
2265 | *token_p = token; |
2266 | *value_p = value; |
2267 | } |
2268 | |