1 | use std::ffi::OsStr; |
2 | |
3 | pub trait OsStrExt: private::Sealed { |
4 | /// Converts to a string slice. |
5 | fn try_str(&self) -> Result<&str, std::str::Utf8Error>; |
6 | /// Returns `true` if the given pattern matches a sub-slice of |
7 | /// this string slice. |
8 | /// |
9 | /// Returns `false` if it does not. |
10 | /// |
11 | /// # Examples |
12 | /// |
13 | /// ```rust |
14 | /// use clap_lex::OsStrExt as _; |
15 | /// let bananas = std::ffi::OsStr::new("bananas" ); |
16 | /// |
17 | /// assert!(bananas.contains("nana" )); |
18 | /// assert!(!bananas.contains("apples" )); |
19 | /// ``` |
20 | fn contains(&self, needle: &str) -> bool; |
21 | /// Returns the byte index of the first character of this string slice that |
22 | /// matches the pattern. |
23 | /// |
24 | /// Returns [`None`] if the pattern doesn't match. |
25 | /// |
26 | /// # Examples |
27 | /// |
28 | /// ```rust |
29 | /// use clap_lex::OsStrExt as _; |
30 | /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi" ); |
31 | /// |
32 | /// assert_eq!(s.find("L" ), Some(0)); |
33 | /// assert_eq!(s.find("é" ), Some(14)); |
34 | /// assert_eq!(s.find("par" ), Some(17)); |
35 | /// ``` |
36 | /// |
37 | /// Not finding the pattern: |
38 | /// |
39 | /// ```rust |
40 | /// use clap_lex::OsStrExt as _; |
41 | /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard" ); |
42 | /// |
43 | /// assert_eq!(s.find("1" ), None); |
44 | /// ``` |
45 | fn find(&self, needle: &str) -> Option<usize>; |
46 | /// Returns a string slice with the prefix removed. |
47 | /// |
48 | /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped |
49 | /// in `Some`. |
50 | /// |
51 | /// If the string does not start with `prefix`, returns `None`. |
52 | /// |
53 | /// # Examples |
54 | /// |
55 | /// ``` |
56 | /// use std::ffi::OsStr; |
57 | /// use clap_lex::OsStrExt as _; |
58 | /// assert_eq!(OsStr::new("foo:bar" ).strip_prefix("foo:" ), Some(OsStr::new("bar" ))); |
59 | /// assert_eq!(OsStr::new("foo:bar" ).strip_prefix("bar" ), None); |
60 | /// assert_eq!(OsStr::new("foofoo" ).strip_prefix("foo" ), Some(OsStr::new("foo" ))); |
61 | /// ``` |
62 | fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>; |
63 | /// Returns `true` if the given pattern matches a prefix of this |
64 | /// string slice. |
65 | /// |
66 | /// Returns `false` if it does not. |
67 | /// |
68 | /// # Examples |
69 | /// |
70 | /// ``` |
71 | /// use clap_lex::OsStrExt as _; |
72 | /// let bananas = std::ffi::OsStr::new("bananas" ); |
73 | /// |
74 | /// assert!(bananas.starts_with("bana" )); |
75 | /// assert!(!bananas.starts_with("nana" )); |
76 | /// ``` |
77 | fn starts_with(&self, prefix: &str) -> bool; |
78 | /// An iterator over substrings of this string slice, separated by |
79 | /// characters matched by a pattern. |
80 | /// |
81 | /// # Examples |
82 | /// |
83 | /// Simple patterns: |
84 | /// |
85 | /// ``` |
86 | /// use std::ffi::OsStr; |
87 | /// use clap_lex::OsStrExt as _; |
88 | /// let v: Vec<_> = OsStr::new("Mary had a little lamb" ).split(" " ).collect(); |
89 | /// assert_eq!(v, [OsStr::new("Mary" ), OsStr::new("had" ), OsStr::new("a" ), OsStr::new("little" ), OsStr::new("lamb" )]); |
90 | /// |
91 | /// let v: Vec<_> = OsStr::new("" ).split("X" ).collect(); |
92 | /// assert_eq!(v, [OsStr::new("" )]); |
93 | /// |
94 | /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard" ).split("X" ).collect(); |
95 | /// assert_eq!(v, [OsStr::new("lion" ), OsStr::new("" ), OsStr::new("tiger" ), OsStr::new("leopard" )]); |
96 | /// |
97 | /// let v: Vec<_> = OsStr::new("lion::tiger::leopard" ).split("::" ).collect(); |
98 | /// assert_eq!(v, [OsStr::new("lion" ), OsStr::new("tiger" ), OsStr::new("leopard" )]); |
99 | /// ``` |
100 | /// |
101 | /// If a string contains multiple contiguous separators, you will end up |
102 | /// with empty strings in the output: |
103 | /// |
104 | /// ``` |
105 | /// use std::ffi::OsStr; |
106 | /// use clap_lex::OsStrExt as _; |
107 | /// let x = OsStr::new("||||a||b|c" ); |
108 | /// let d: Vec<_> = x.split("|" ).collect(); |
109 | /// |
110 | /// assert_eq!(d, &[OsStr::new("" ), OsStr::new("" ), OsStr::new("" ), OsStr::new("" ), OsStr::new("a" ), OsStr::new("" ), OsStr::new("b" ), OsStr::new("c" )]); |
111 | /// ``` |
112 | /// |
113 | /// Contiguous separators are separated by the empty string. |
114 | /// |
115 | /// ``` |
116 | /// use std::ffi::OsStr; |
117 | /// use clap_lex::OsStrExt as _; |
118 | /// let x = OsStr::new("(///)" ); |
119 | /// let d: Vec<_> = x.split("/" ).collect(); |
120 | /// |
121 | /// assert_eq!(d, &[OsStr::new("(" ), OsStr::new("" ), OsStr::new("" ), OsStr::new(")" )]); |
122 | /// ``` |
123 | /// |
124 | /// Separators at the start or end of a string are neighbored |
125 | /// by empty strings. |
126 | /// |
127 | /// ``` |
128 | /// use std::ffi::OsStr; |
129 | /// use clap_lex::OsStrExt as _; |
130 | /// let d: Vec<_> = OsStr::new("010" ).split("0" ).collect(); |
131 | /// assert_eq!(d, &[OsStr::new("" ), OsStr::new("1" ), OsStr::new("" )]); |
132 | /// ``` |
133 | /// |
134 | /// When the empty string is used as a separator, it panics |
135 | /// |
136 | /// ```should_panic |
137 | /// use std::ffi::OsStr; |
138 | /// use clap_lex::OsStrExt as _; |
139 | /// let f: Vec<_> = OsStr::new("rust" ).split("" ).collect(); |
140 | /// assert_eq!(f, &[OsStr::new("" ), OsStr::new("r" ), OsStr::new("u" ), OsStr::new("s" ), OsStr::new("t" ), OsStr::new("" )]); |
141 | /// ``` |
142 | /// |
143 | /// Contiguous separators can lead to possibly surprising behavior |
144 | /// when whitespace is used as the separator. This code is correct: |
145 | /// |
146 | /// ``` |
147 | /// use std::ffi::OsStr; |
148 | /// use clap_lex::OsStrExt as _; |
149 | /// let x = OsStr::new(" a b c" ); |
150 | /// let d: Vec<_> = x.split(" " ).collect(); |
151 | /// |
152 | /// assert_eq!(d, &[OsStr::new("" ), OsStr::new("" ), OsStr::new("" ), OsStr::new("" ), OsStr::new("a" ), OsStr::new("" ), OsStr::new("b" ), OsStr::new("c" )]); |
153 | /// ``` |
154 | /// |
155 | /// It does _not_ give you: |
156 | /// |
157 | /// ```,ignore |
158 | /// assert_eq!(d, &[OsStr::new("a" ), OsStr::new("b" ), OsStr::new("c" )]); |
159 | /// ``` |
160 | /// |
161 | /// Use [`split_whitespace`] for this behavior. |
162 | /// |
163 | /// [`split_whitespace`]: str::split_whitespace |
164 | fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>; |
165 | /// Splits the string on the first occurrence of the specified delimiter and |
166 | /// returns prefix before delimiter and suffix after delimiter. |
167 | /// |
168 | /// # Examples |
169 | /// |
170 | /// ``` |
171 | /// use std::ffi::OsStr; |
172 | /// use clap_lex::OsStrExt as _; |
173 | /// assert_eq!(OsStr::new("cfg" ).split_once("=" ), None); |
174 | /// assert_eq!(OsStr::new("cfg=" ).split_once("=" ), Some((OsStr::new("cfg" ), OsStr::new("" )))); |
175 | /// assert_eq!(OsStr::new("cfg=foo" ).split_once("=" ), Some((OsStr::new("cfg" ), OsStr::new("foo" )))); |
176 | /// assert_eq!(OsStr::new("cfg=foo=bar" ).split_once("=" ), Some((OsStr::new("cfg" ), OsStr::new("foo=bar" )))); |
177 | /// ``` |
178 | fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>; |
179 | } |
180 | |
181 | impl OsStrExt for OsStr { |
182 | fn try_str(&self) -> Result<&str, std::str::Utf8Error> { |
183 | let bytes = to_bytes(self); |
184 | std::str::from_utf8(bytes) |
185 | } |
186 | |
187 | fn contains(&self, needle: &str) -> bool { |
188 | self.find(needle).is_some() |
189 | } |
190 | |
191 | fn find(&self, needle: &str) -> Option<usize> { |
192 | let bytes = to_bytes(self); |
193 | (0..=self.len().checked_sub(needle.len())?) |
194 | .find(|&x| bytes[x..].starts_with(needle.as_bytes())) |
195 | } |
196 | |
197 | fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> { |
198 | let bytes = to_bytes(self); |
199 | bytes.strip_prefix(prefix.as_bytes()).map(|s| { |
200 | // SAFETY: |
201 | // - This came from `to_bytes` |
202 | // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie |
203 | unsafe { to_os_str_unchecked(s) } |
204 | }) |
205 | } |
206 | fn starts_with(&self, prefix: &str) -> bool { |
207 | let bytes = to_bytes(self); |
208 | bytes.starts_with(prefix.as_bytes()) |
209 | } |
210 | |
211 | fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> { |
212 | assert_ne!(needle, "" ); |
213 | Split { |
214 | haystack: Some(self), |
215 | needle, |
216 | } |
217 | } |
218 | |
219 | fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> { |
220 | let start = self.find(needle)?; |
221 | let end = start + needle.len(); |
222 | let haystack = to_bytes(self); |
223 | let first = &haystack[0..start]; |
224 | let second = &haystack[end..]; |
225 | // SAFETY: |
226 | // - This came from `to_bytes` |
227 | // - Since `needle` is `&str`, any split will be along UTF-8 boundarie |
228 | unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) } |
229 | } |
230 | } |
231 | |
232 | mod private { |
233 | pub trait Sealed {} |
234 | |
235 | impl Sealed for std::ffi::OsStr {} |
236 | } |
237 | |
238 | /// Allow access to raw bytes |
239 | /// |
240 | /// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with |
241 | /// 7-bit ASCII or `&str` |
242 | /// |
243 | /// # Compatibility |
244 | /// |
245 | /// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate |
246 | /// (since its dependent on rustc) |
247 | fn to_bytes(s: &OsStr) -> &[u8] { |
248 | // SAFETY: |
249 | // - Lifetimes are the same |
250 | // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) |
251 | // - The primary contract is that the encoding for invalid surrogate code points is not |
252 | // guaranteed which isn't a problem here |
253 | // |
254 | // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) |
255 | // but its in limbo |
256 | unsafe { std::mem::transmute(s) } |
257 | } |
258 | |
259 | /// Restore raw bytes as `OsStr` |
260 | /// |
261 | /// # Safety |
262 | /// |
263 | /// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary |
264 | /// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries |
265 | unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr { |
266 | // SAFETY: |
267 | // - Lifetimes are the same |
268 | // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) |
269 | // - The primary contract is that the encoding for invalid surrogate code points is not |
270 | // guaranteed which isn't a problem here |
271 | // |
272 | // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) |
273 | // but its in limbo |
274 | std::mem::transmute(s) |
275 | } |
276 | |
277 | pub struct Split<'s, 'n> { |
278 | haystack: Option<&'s OsStr>, |
279 | needle: &'n str, |
280 | } |
281 | |
282 | impl<'s, 'n> Iterator for Split<'s, 'n> { |
283 | type Item = &'s OsStr; |
284 | |
285 | fn next(&mut self) -> Option<Self::Item> { |
286 | let haystack = self.haystack?; |
287 | match haystack.split_once(self.needle) { |
288 | Some((first, second)) => { |
289 | if !haystack.is_empty() { |
290 | debug_assert_ne!(haystack, second); |
291 | } |
292 | self.haystack = Some(second); |
293 | Some(first) |
294 | } |
295 | None => { |
296 | self.haystack = None; |
297 | Some(haystack) |
298 | } |
299 | } |
300 | } |
301 | } |
302 | |
303 | /// Split an `OsStr` |
304 | /// |
305 | /// # Safety |
306 | /// |
307 | /// `index` must be at a valid UTF-8 boundary |
308 | pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) { |
309 | let bytes = to_bytes(os); |
310 | let (first, second) = bytes.split_at(index); |
311 | (to_os_str_unchecked(first), to_os_str_unchecked(second)) |
312 | } |
313 | |