1 | use std::{fmt, ops::Range}; |
2 | |
3 | use crate::{ |
4 | Buffer, ParseError, |
5 | err::{perr, ParseErrorKind::*}, |
6 | escape::{scan_raw_string, unescape_string}, |
7 | parse::first_byte_or_empty, |
8 | }; |
9 | |
10 | |
11 | /// A string or raw string literal, e.g. `"foo"`, `"Grüße"` or `r#"a🦊c"d🦀f"#`. |
12 | /// |
13 | /// See [the reference][ref] for more information. |
14 | /// |
15 | /// [ref]: https://doc.rust-lang.org/reference/tokens.html#string-literals |
16 | #[derive (Debug, Clone, PartialEq, Eq)] |
17 | pub struct StringLit<B: Buffer> { |
18 | /// The raw input. |
19 | raw: B, |
20 | |
21 | /// The string value (with all escapes unescaped), or `None` if there were |
22 | /// no escapes. In the latter case, the string value is in `raw`. |
23 | value: Option<String>, |
24 | |
25 | /// The number of hash signs in case of a raw string literal, or `None` if |
26 | /// it's not a raw string literal. |
27 | num_hashes: Option<u32>, |
28 | |
29 | /// Start index of the suffix or `raw.len()` if there is no suffix. |
30 | start_suffix: usize, |
31 | } |
32 | |
33 | impl<B: Buffer> StringLit<B> { |
34 | /// Parses the input as a (raw) string literal. Returns an error if the |
35 | /// input is invalid or represents a different kind of literal. |
36 | pub fn parse(input: B) -> Result<Self, ParseError> { |
37 | match first_byte_or_empty(&input)? { |
38 | b'r' | b'"' => { |
39 | let (value, num_hashes, start_suffix) = parse_impl(&input)?; |
40 | Ok(Self { raw: input, value, num_hashes, start_suffix }) |
41 | } |
42 | _ => Err(perr(0, InvalidStringLiteralStart)), |
43 | } |
44 | } |
45 | |
46 | /// Returns the string value this literal represents (where all escapes have |
47 | /// been turned into their respective values). |
48 | pub fn value(&self) -> &str { |
49 | self.value.as_deref().unwrap_or(&self.raw[self.inner_range()]) |
50 | } |
51 | |
52 | /// Like `value` but returns a potentially owned version of the value. |
53 | /// |
54 | /// The return value is either `Cow<'static, str>` if `B = String`, or |
55 | /// `Cow<'a, str>` if `B = &'a str`. |
56 | pub fn into_value(self) -> B::Cow { |
57 | let inner_range = self.inner_range(); |
58 | let Self { raw, value, .. } = self; |
59 | value.map(B::Cow::from).unwrap_or_else(|| raw.cut(inner_range).into_cow()) |
60 | } |
61 | |
62 | /// The optional suffix. Returns `""` if the suffix is empty/does not exist. |
63 | pub fn suffix(&self) -> &str { |
64 | &(*self.raw)[self.start_suffix..] |
65 | } |
66 | |
67 | /// Returns whether this literal is a raw string literal (starting with |
68 | /// `r`). |
69 | pub fn is_raw_string(&self) -> bool { |
70 | self.num_hashes.is_some() |
71 | } |
72 | |
73 | /// Returns the raw input that was passed to `parse`. |
74 | pub fn raw_input(&self) -> &str { |
75 | &self.raw |
76 | } |
77 | |
78 | /// Returns the raw input that was passed to `parse`, potentially owned. |
79 | pub fn into_raw_input(self) -> B { |
80 | self.raw |
81 | } |
82 | |
83 | /// The range within `self.raw` that excludes the quotes and potential `r#`. |
84 | fn inner_range(&self) -> Range<usize> { |
85 | match self.num_hashes { |
86 | None => 1..self.start_suffix - 1, |
87 | Some(n) => 1 + n as usize + 1..self.start_suffix - n as usize - 1, |
88 | } |
89 | } |
90 | } |
91 | |
92 | impl StringLit<&str> { |
93 | /// Makes a copy of the underlying buffer and returns the owned version of |
94 | /// `Self`. |
95 | pub fn into_owned(self) -> StringLit<String> { |
96 | StringLit { |
97 | raw: self.raw.to_owned(), |
98 | value: self.value, |
99 | num_hashes: self.num_hashes, |
100 | start_suffix: self.start_suffix, |
101 | } |
102 | } |
103 | } |
104 | |
105 | impl<B: Buffer> fmt::Display for StringLit<B> { |
106 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
107 | f.pad(&self.raw) |
108 | } |
109 | } |
110 | |
111 | /// Precondition: input has to start with either `"` or `r`. |
112 | #[inline (never)] |
113 | pub(crate) fn parse_impl(input: &str) -> Result<(Option<String>, Option<u32>, usize), ParseError> { |
114 | if input.starts_with('r' ) { |
115 | scan_raw_string::<char>(&input, 1) |
116 | .map(|(v: Option, hashes: u32, start_suffix: usize)| (v, Some(hashes), start_suffix)) |
117 | } else { |
118 | unescape_string::<char>(&input, 1) |
119 | .map(|(v: Option, start_suffix: usize)| (v, None, start_suffix)) |
120 | } |
121 | } |
122 | |
123 | |
124 | #[cfg (test)] |
125 | mod tests; |
126 | |