1//! Code for parsing resource management things
2
3use super::{Binding, Component, Entry};
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6
7// =======================
8// Common helper functions
9// =======================
10
11/// Check if a character (well, u8) is an octal digit
12fn is_octal_digit(c: u8) -> bool {
13 matches!(c, b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7')
14}
15
16/// Find the longest prefix of the given data where the given callback returns true
17fn parse_with_matcher<M>(data: &[u8], matcher: M) -> (&[u8], &[u8])
18where
19 M: Fn(u8) -> bool,
20{
21 let end: usize = data
22 .iter()
23 .enumerate()
24 .find(|(_, &c)| !matcher(c))
25 .map(|(idx, _)| idx)
26 .unwrap_or(default:data.len());
27 (&data[..end], &data[end..])
28}
29
30/// Check if a character is allowed in a quark name
31fn allowed_in_quark_name(c: u8) -> bool {
32 c.is_ascii_alphanumeric() || c == b'-' || c == b'_'
33}
34
35/// Find the longest prefix satisfying allowed_in_quark_name().
36/// This returns (Some(prefix), remaining) if a prefix is found, else (None, data).
37fn next_component(data: &[u8]) -> (Option<&[u8]>, &[u8]) {
38 let (prefix: &[u8], remaining: &[u8]) = parse_with_matcher(data, matcher:allowed_in_quark_name);
39 match prefix {
40 [] => (None, remaining),
41 prefix: &[u8] => (Some(prefix), remaining),
42 }
43}
44
45// =========================
46// Parser for resource files
47// =========================
48
49/// Skip to the next end of line in the given data
50fn skip_to_eol(data: &[u8]) -> &[u8] {
51 parse_with_matcher(data, |c: u8| c != b'\n').1
52}
53
54/// Skip all spaces in the given data
55fn skip_spaces(data: &[u8]) -> &[u8] {
56 parse_with_matcher(data, |c: u8| c == b' ').1
57}
58
59/// Skip the given text. Returns `None` if the text was not found
60fn skip_text<'a>(data: &'a [u8], text: &[u8]) -> Option<&'a [u8]> {
61 if data.starts_with(needle:text) {
62 Some(&data[text.len()..])
63 } else {
64 None
65 }
66}
67
68/// Parse a single `Component` from the data. This can either be a wildcard ("?") or a
69/// component made up of characters accepted by `allowed_in_quark_name`.
70fn next_component_name(data: &[u8]) -> (Option<Component>, &[u8]) {
71 if data.first() == Some(&b'?') {
72 (Some(Component::Wildcard), &data[1..])
73 } else {
74 let (comp: Option<&[u8]>, remaining: &[u8]) = next_component(data);
75 let comp: Option = comp.map(|s: &[u8]| {
76 let s: &str = std::str::from_utf8(s).expect(msg:"ascii-only");
77 Component::Normal(s.to_string())
78 });
79 (comp, remaining)
80 }
81}
82
83/// Parse a resource like "foo.?*baz" (wildcards allowed)
84fn parse_components(data: &[u8]) -> (Vec<(Binding, Component)>, &[u8]) {
85 fn parse_binding(mut data: &[u8]) -> (Binding, &[u8]) {
86 let mut binding = Binding::Tight;
87 loop {
88 match data.first() {
89 Some(&b'*') => binding = Binding::Loose,
90 Some(&b'.') => {}
91 _ => break,
92 }
93 data = &data[1..];
94 }
95 (binding, data)
96 }
97
98 let mut data = data;
99 let mut result = Vec::new();
100 loop {
101 let (binding, remaining) = parse_binding(data);
102 if let (Some(component), remaining) = next_component_name(remaining) {
103 data = remaining;
104 result.push((binding, component));
105 } else {
106 break;
107 }
108 }
109 (result, data)
110}
111
112/// Parse a full entry from the data. This begins with components (see `parse_components()`),
113/// then after a colon (":") comes the value. The value may contain escape sequences.
114fn parse_entry(data: &[u8]) -> (Result<Entry, ()>, &[u8]) {
115 let (components, data) = parse_components(data);
116
117 match components.last() {
118 // Empty components are not allowed
119 None => return (Err(()), skip_to_eol(data)),
120 // The last component may not be a wildcard
121 Some((_, Component::Wildcard)) => return (Err(()), skip_to_eol(data)),
122 _ => {}
123 }
124
125 let data = skip_spaces(data);
126
127 // next comes a colon
128 let data = match data.split_first() {
129 Some((&b':', data)) => data,
130 _ => return (Err(()), skip_to_eol(data)),
131 };
132
133 // skip more spaces and let \ escape line breaks
134 let mut data = data;
135 loop {
136 let (_, remaining) = parse_with_matcher(data, |c| c == b' ' || c == b'\t');
137 if remaining.get(..2) == Some(&b"\\\n"[..]) {
138 data = &remaining[2..];
139 } else {
140 data = remaining;
141 break;
142 }
143 }
144
145 // Parse the value, decoding escape sequences. The most complicated case are octal escape
146 // sequences like \123.
147 let mut value = Vec::new();
148 let mut index = 0;
149 let mut octal = None;
150 while let Some(&b) = data.get(index) {
151 index += 1;
152 if b == b'\n' {
153 break;
154 }
155 if let Some(oct) = octal {
156 if is_octal_digit(b) {
157 // We are currently parsing an octal; add the new character
158 match oct {
159 (x, None) => octal = Some((x, Some(b))),
160 (x, Some(y)) => {
161 let (x, y, z) = (x - b'0', y - b'0', b - b'0');
162 let decoded = (x * 8 + y) * 8 + z;
163 value.push(decoded);
164 octal = None;
165 }
166 }
167 continue;
168 } else {
169 // Not an octal sequence; add the collected characters to the output
170 value.push(b'\\');
171 value.push(oct.0);
172 if let Some(oct2) = oct.1 {
173 value.push(oct2);
174 }
175 octal = None;
176
177 // Fall through to the parsing code below
178 }
179 }
180 if b != b'\\' {
181 value.push(b);
182 } else {
183 match data.get(index) {
184 None => {
185 value.push(b);
186 // Keep index as-is. This is to counter the += 1 below.
187 index -= 1;
188 }
189 Some(b' ') => value.push(b' '),
190 Some(b'\t') => value.push(b'\t'),
191 Some(b'n') => value.push(b'\n'),
192 Some(b'\\') => value.push(b'\\'),
193 Some(b'\n') => { /* Continue parsing next line */ }
194 Some(&x) if is_octal_digit(x) => octal = Some((x, None)),
195 Some(&x) => {
196 value.push(b);
197 value.push(x);
198 }
199 }
200 index += 1;
201 }
202 }
203
204 let entry = Entry { components, value };
205 (Ok(entry), &data[index..])
206}
207
208/// Parse the contents of a database
209pub(crate) fn parse_database<F>(mut data: &[u8], result: &mut Vec<Entry>, mut include_callback: F)
210where
211 for<'r> F: FnMut(&'r [u8], &mut Vec<Entry>),
212{
213 // Iterate over lines
214 while let Some(first) = data.first() {
215 match first {
216 // Skip empty lines
217 b'\n' => data = &data[1..],
218 // Comment, skip the line
219 b'!' => data = skip_to_eol(data),
220 b'#' => {
221 let remaining = skip_spaces(&data[1..]);
222 // Skip to the next line for the next loop iteration. The rest of the code here
223 // tried to parse the line.
224 data = skip_to_eol(remaining);
225
226 // Only #include is defined
227 if let Some(remaining) = skip_text(remaining, b"include") {
228 let (_, remaining) = parse_with_matcher(remaining, |c| c == b' ');
229 // Find the text enclosed in quotation marks
230 if let Some(b'\"') = remaining.first() {
231 let (file, remaining) =
232 parse_with_matcher(&remaining[1..], |c| c != b'"' && c != b'\n');
233 if let Some(b'\"') = remaining.first() {
234 // Okay, we found a well-formed include directive.
235 include_callback(file, result);
236 }
237 }
238 }
239 }
240 _ => {
241 let (entry, remaining) = parse_entry(data);
242 data = remaining;
243 // Add the entry to the result if we parsed one; ignore errors
244 result.extend(entry.ok());
245 }
246 }
247 }
248}
249
250/// Parse a resource query like "foo.bar.baz" (no wildcards allowed, no bindings allowed)
251pub(crate) fn parse_query(data: &[u8]) -> Option<Vec<String>> {
252 let mut data: &[u8] = data;
253 let mut result: Vec = Vec::new();
254 while let (Some(component: &[u8]), remaining: &[u8]) = next_component(data) {
255 data = remaining;
256 while let Some(&b'.') = data.first() {
257 data = &data[1..];
258 }
259 let component: &str = std::str::from_utf8(component).expect(msg:"ascii-only");
260 result.push(component.to_string());
261 }
262 if data.is_empty() {
263 Some(result)
264 } else {
265 None
266 }
267}
268
269#[cfg(test)]
270mod test {
271 use super::{parse_database, parse_entry, parse_query, Binding, Component, Entry};
272 use alloc::string::{String, ToString};
273 use alloc::vec;
274 use alloc::vec::Vec;
275 use std::eprintln;
276
277 // Most tests in here are based on [1], which is: Copyright © 2016 Ingo Bürk
278 // [1]: https://github.com/Airblader/xcb-util-xrm/blob/master/tests/tests_parser.c
279
280 #[test]
281 fn test_parse_query_success() {
282 let tests = [
283 (
284 &b"First.second"[..],
285 vec!["First".to_string(), "second".to_string()],
286 ),
287 (b"", Vec::new()),
288 (
289 b"urxvt.scrollBar_right",
290 vec!["urxvt".to_string(), "scrollBar_right".to_string()],
291 ),
292 (
293 b"urxvt.Control-Shift-Up",
294 vec!["urxvt".to_string(), "Control-Shift-Up".to_string()],
295 ),
296 ];
297 for (data, expected) in tests.iter() {
298 let result = parse_query(data);
299 assert_eq!(result.as_ref(), Some(expected), "while parsing {:?}", data);
300 }
301 }
302
303 #[test]
304 fn test_parse_query_error() {
305 let tests = [
306 &b"First.second: on"[..],
307 b"First*second",
308 b"First.?.second",
309 b"*second",
310 b"?.second",
311 ];
312 for data in tests.iter() {
313 let result = parse_query(data);
314 assert!(
315 result.is_none(),
316 "Unexpected success parsing '{:?}': {:?}",
317 data,
318 result,
319 );
320 }
321 }
322
323 #[test]
324 fn test_parse_entry_success() {
325 let tests = [
326 // Basics
327 (
328 &b"First: 1"[..],
329 vec![(Binding::Tight, Component::Normal("First".to_string()))],
330 &b"1"[..],
331 ),
332 (
333 b"First.second: 1",
334 vec![
335 (Binding::Tight, Component::Normal("First".to_string())),
336 (Binding::Tight, Component::Normal("second".to_string())),
337 ],
338 b"1",
339 ),
340 (
341 b"First..second: 1",
342 vec![
343 (Binding::Tight, Component::Normal("First".to_string())),
344 (Binding::Tight, Component::Normal("second".to_string())),
345 ],
346 b"1",
347 ),
348 // Wildcards
349 (
350 b"?.second: 1",
351 vec![
352 (Binding::Tight, Component::Wildcard),
353 (Binding::Tight, Component::Normal("second".to_string())),
354 ],
355 b"1",
356 ),
357 (
358 b"First.?.third: 1",
359 vec![
360 (Binding::Tight, Component::Normal("First".to_string())),
361 (Binding::Tight, Component::Wildcard),
362 (Binding::Tight, Component::Normal("third".to_string())),
363 ],
364 b"1",
365 ),
366 // Loose bindings
367 (
368 b"*second: 1",
369 vec![(Binding::Loose, Component::Normal("second".to_string()))],
370 b"1",
371 ),
372 (
373 b"First*third: 1",
374 vec![
375 (Binding::Tight, Component::Normal("First".to_string())),
376 (Binding::Loose, Component::Normal("third".to_string())),
377 ],
378 b"1",
379 ),
380 (
381 b"First**third: 1",
382 vec![
383 (Binding::Tight, Component::Normal("First".to_string())),
384 (Binding::Loose, Component::Normal("third".to_string())),
385 ],
386 b"1",
387 ),
388 // Combinations
389 (
390 b"First*?.fourth: 1",
391 vec![
392 (Binding::Tight, Component::Normal("First".to_string())),
393 (Binding::Loose, Component::Wildcard),
394 (Binding::Tight, Component::Normal("fourth".to_string())),
395 ],
396 b"1",
397 ),
398 // Values
399 (
400 b"First: 1337",
401 vec![(Binding::Tight, Component::Normal("First".to_string()))],
402 b"1337",
403 ),
404 (
405 b"First: -1337",
406 vec![(Binding::Tight, Component::Normal("First".to_string()))],
407 b"-1337",
408 ),
409 (
410 b"First: 13.37",
411 vec![(Binding::Tight, Component::Normal("First".to_string()))],
412 b"13.37",
413 ),
414 (
415 b"First: value",
416 vec![(Binding::Tight, Component::Normal("First".to_string()))],
417 b"value",
418 ),
419 (
420 b"First: #abcdef",
421 vec![(Binding::Tight, Component::Normal("First".to_string()))],
422 b"#abcdef",
423 ),
424 (
425 b"First: { key: 'value' }",
426 vec![(Binding::Tight, Component::Normal("First".to_string()))],
427 b"{ key: 'value' }",
428 ),
429 (
430 b"First: x?y",
431 vec![(Binding::Tight, Component::Normal("First".to_string()))],
432 b"x?y",
433 ),
434 (
435 b"First: x*y",
436 vec![(Binding::Tight, Component::Normal("First".to_string()))],
437 b"x*y",
438 ),
439 // Whitespace
440 (
441 b"First: x",
442 vec![(Binding::Tight, Component::Normal("First".to_string()))],
443 b"x",
444 ),
445 (
446 b"First: x ",
447 vec![(Binding::Tight, Component::Normal("First".to_string()))],
448 b"x ",
449 ),
450 (
451 b"First: x ",
452 vec![(Binding::Tight, Component::Normal("First".to_string()))],
453 b"x ",
454 ),
455 (
456 b"First:x",
457 vec![(Binding::Tight, Component::Normal("First".to_string()))],
458 b"x",
459 ),
460 (
461 b"First: \t x",
462 vec![(Binding::Tight, Component::Normal("First".to_string()))],
463 b"x",
464 ),
465 (
466 b"First: \t x \t",
467 vec![(Binding::Tight, Component::Normal("First".to_string()))],
468 b"x \t",
469 ),
470 // Special characters
471 (
472 b"First: \\ x",
473 vec![(Binding::Tight, Component::Normal("First".to_string()))],
474 b" x",
475 ),
476 (
477 b"First: x\\ x",
478 vec![(Binding::Tight, Component::Normal("First".to_string()))],
479 b"x x",
480 ),
481 (
482 b"First: \\\tx",
483 vec![(Binding::Tight, Component::Normal("First".to_string()))],
484 b"\tx",
485 ),
486 (
487 b"First: \\011x",
488 vec![(Binding::Tight, Component::Normal("First".to_string()))],
489 b"\tx",
490 ),
491 (
492 b"First: x\\\\x",
493 vec![(Binding::Tight, Component::Normal("First".to_string()))],
494 b"x\\x",
495 ),
496 (
497 b"First: x\\nx",
498 vec![(Binding::Tight, Component::Normal("First".to_string()))],
499 b"x\nx",
500 ),
501 (
502 b"First: \\080",
503 vec![(Binding::Tight, Component::Normal("First".to_string()))],
504 b"\\080",
505 ),
506 (
507 b"First: \\00a",
508 vec![(Binding::Tight, Component::Normal("First".to_string()))],
509 b"\\00a",
510 ),
511 // Own tests
512 // Some more escape tests, e.g. escape at end of input
513 (
514 b"First: \\",
515 vec![(Binding::Tight, Component::Normal("First".to_string()))],
516 b"\\",
517 ),
518 (
519 b"First: \\xxx",
520 vec![(Binding::Tight, Component::Normal("First".to_string()))],
521 b"\\xxx",
522 ),
523 (
524 b"First: \\1xx",
525 vec![(Binding::Tight, Component::Normal("First".to_string()))],
526 b"\\1xx",
527 ),
528 (
529 b"First: \\10x",
530 vec![(Binding::Tight, Component::Normal("First".to_string()))],
531 b"\\10x",
532 ),
533 (
534 b"First: \\100",
535 vec![(Binding::Tight, Component::Normal("First".to_string()))],
536 b"@",
537 ),
538 (
539 b"First: \\n",
540 vec![(Binding::Tight, Component::Normal("First".to_string()))],
541 b"\n",
542 ),
543 ];
544 for (data, resource, value) in tests.iter() {
545 run_entry_test(data, resource, value);
546 }
547 }
548
549 #[test]
550 fn test_parse_entry_error() {
551 let tests = [
552 &b": 1"[..],
553 b"?: 1",
554 b"First",
555 b"First second",
556 b"First.?: 1",
557 b"F\xc3\xb6rst: 1",
558 b"F~rst: 1",
559 ];
560 for data in tests.iter() {
561 match parse_entry(data) {
562 (Ok(v), _) => panic!("Unexpected success parsing '{:?}': {:?}", data, v),
563 (Err(_), b"") => {}
564 (Err(_), remaining) => panic!(
565 "Unexpected remaining data parsing '{:?}': {:?}",
566 data, remaining
567 ),
568 }
569 }
570 }
571
572 #[test]
573 fn test_parse_large_value() {
574 let value = vec![b'x'; 1025];
575 let mut data = b"First: ".to_vec();
576 data.extend(&value);
577 let resource = (Binding::Tight, Component::Normal("First".to_string()));
578 run_entry_test(&data, &[resource], &value);
579 }
580
581 #[test]
582 fn test_parse_large_resource() {
583 let x = vec![b'x'; 1025];
584 let y = vec![b'y'; 1025];
585 let mut data = x.clone();
586 data.push(b'.');
587 data.extend(&y);
588 data.extend(b": 1");
589 let resource = [
590 (
591 Binding::Tight,
592 Component::Normal(String::from_utf8(x).unwrap()),
593 ),
594 (
595 Binding::Tight,
596 Component::Normal(String::from_utf8(y).unwrap()),
597 ),
598 ];
599 run_entry_test(&data, &resource, b"1");
600 }
601
602 #[test]
603 fn test_parse_database() {
604 let expected_entry = Entry {
605 components: vec![(Binding::Tight, Component::Normal("First".to_string()))],
606 value: b"1".to_vec(),
607 };
608 let tests = [
609 (&b"First: 1\n\n\n"[..], vec![expected_entry.clone()]),
610 (b"First: 1\n!Foo", vec![expected_entry.clone()]),
611 (b"!First: 1\nbar\n\n\n", Vec::new()),
612 (b"!bar\nFirst: 1\nbaz", vec![expected_entry.clone()]),
613 (b"First :\\\n \\\n\\\n1\n", vec![expected_entry]),
614 (
615 b"First: \\\n 1\\\n2\n",
616 vec![Entry {
617 components: vec![(Binding::Tight, Component::Normal("First".to_string()))],
618 value: b"12".to_vec(),
619 }],
620 ),
621 ];
622 let mut success = true;
623 for (data, expected) in tests.iter() {
624 let mut result = Vec::new();
625 parse_database(data, &mut result, |_, _| unreachable!());
626 if &result != expected {
627 eprintln!("While testing {:?}", data);
628 eprintln!("Expected: {:?}", expected);
629 eprintln!("Got: {:?}", result);
630 eprintln!();
631 success = false;
632 }
633 }
634 if !success {
635 panic!()
636 }
637 }
638
639 #[test]
640 fn test_include_parsing() {
641 let tests = [
642 (&b"#include\"test\""[..], vec![&b"test"[..]]),
643 (b"#include\"test", Vec::new()),
644 (b"#include\"", Vec::new()),
645 (b"#include", Vec::new()),
646 (b"#includ", Vec::new()),
647 (b"#in", Vec::new()),
648 (b"# foo", Vec::new()),
649 (
650 b"# include \" test \" \n#include \"foo\"",
651 vec![b" test ", b"foo"],
652 ),
653 ];
654 let mut success = true;
655 for (data, expected) in tests.iter() {
656 let mut result = Vec::new();
657 let mut calls = Vec::new();
658 parse_database(data, &mut result, |file, _| calls.push(file.to_vec()));
659 if &calls != expected {
660 eprintln!("While testing {:?}", data);
661 eprintln!("Expected: {:?}", expected);
662 eprintln!("Got: {:?}", calls);
663 eprintln!();
664 success = false;
665 }
666 }
667 if !success {
668 panic!()
669 }
670 }
671
672 #[test]
673 fn test_include_additions() {
674 let entry = Entry {
675 components: Vec::new(),
676 value: b"42".to_vec(),
677 };
678 let mut result = Vec::new();
679 parse_database(b"#include\"test\"", &mut result, |file, result| {
680 assert_eq!(file, b"test");
681 result.push(entry.clone());
682 });
683 assert_eq!(result, [entry]);
684 }
685
686 fn run_entry_test(data: &[u8], resource: &[(Binding, Component)], value: &[u8]) {
687 match parse_entry(data) {
688 (Ok(result), remaining) => {
689 assert_eq!(remaining, b"", "failed to parse {:?}", data);
690 assert_eq!(
691 result.components, resource,
692 "incorrect components when parsing {:?}",
693 data
694 );
695 assert_eq!(
696 result.value, value,
697 "incorrect value when parsing {:?}",
698 data
699 );
700 }
701 (Err(err), _) => panic!("Failed to parse '{:?}': {:?}", data, err),
702 }
703 }
704}
705