1use std::ffi::OsStr;
2
3pub trait OsStrExt: private::Sealed {
4 /// Converts to a string slice.
5 fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
6 /// Returns `true` if the given pattern matches a sub-slice of
7 /// this string slice.
8 ///
9 /// Returns `false` if it does not.
10 ///
11 /// # Examples
12 ///
13 /// ```rust
14 /// use clap_lex::OsStrExt as _;
15 /// let bananas = std::ffi::OsStr::new("bananas");
16 ///
17 /// assert!(bananas.contains("nana"));
18 /// assert!(!bananas.contains("apples"));
19 /// ```
20 fn contains(&self, needle: &str) -> bool;
21 /// Returns the byte index of the first character of this string slice that
22 /// matches the pattern.
23 ///
24 /// Returns [`None`] if the pattern doesn't match.
25 ///
26 /// # Examples
27 ///
28 /// ```rust
29 /// use clap_lex::OsStrExt as _;
30 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
31 ///
32 /// assert_eq!(s.find("L"), Some(0));
33 /// assert_eq!(s.find("é"), Some(14));
34 /// assert_eq!(s.find("par"), Some(17));
35 /// ```
36 ///
37 /// Not finding the pattern:
38 ///
39 /// ```rust
40 /// use clap_lex::OsStrExt as _;
41 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
42 ///
43 /// assert_eq!(s.find("1"), None);
44 /// ```
45 fn find(&self, needle: &str) -> Option<usize>;
46 /// Returns a string slice with the prefix removed.
47 ///
48 /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
49 /// in `Some`.
50 ///
51 /// If the string does not start with `prefix`, returns `None`.
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// use std::ffi::OsStr;
57 /// use clap_lex::OsStrExt as _;
58 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
59 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
60 /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
61 /// ```
62 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
63 /// Returns `true` if the given pattern matches a prefix of this
64 /// string slice.
65 ///
66 /// Returns `false` if it does not.
67 ///
68 /// # Examples
69 ///
70 /// ```
71 /// use clap_lex::OsStrExt as _;
72 /// let bananas = std::ffi::OsStr::new("bananas");
73 ///
74 /// assert!(bananas.starts_with("bana"));
75 /// assert!(!bananas.starts_with("nana"));
76 /// ```
77 fn starts_with(&self, prefix: &str) -> bool;
78 /// An iterator over substrings of this string slice, separated by
79 /// characters matched by a pattern.
80 ///
81 /// # Examples
82 ///
83 /// Simple patterns:
84 ///
85 /// ```
86 /// use std::ffi::OsStr;
87 /// use clap_lex::OsStrExt as _;
88 /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
89 /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
90 ///
91 /// let v: Vec<_> = OsStr::new("").split("X").collect();
92 /// assert_eq!(v, [OsStr::new("")]);
93 ///
94 /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
95 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
96 ///
97 /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
98 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
99 /// ```
100 ///
101 /// If a string contains multiple contiguous separators, you will end up
102 /// with empty strings in the output:
103 ///
104 /// ```
105 /// use std::ffi::OsStr;
106 /// use clap_lex::OsStrExt as _;
107 /// let x = OsStr::new("||||a||b|c");
108 /// let d: Vec<_> = x.split("|").collect();
109 ///
110 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
111 /// ```
112 ///
113 /// Contiguous separators are separated by the empty string.
114 ///
115 /// ```
116 /// use std::ffi::OsStr;
117 /// use clap_lex::OsStrExt as _;
118 /// let x = OsStr::new("(///)");
119 /// let d: Vec<_> = x.split("/").collect();
120 ///
121 /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
122 /// ```
123 ///
124 /// Separators at the start or end of a string are neighbored
125 /// by empty strings.
126 ///
127 /// ```
128 /// use std::ffi::OsStr;
129 /// use clap_lex::OsStrExt as _;
130 /// let d: Vec<_> = OsStr::new("010").split("0").collect();
131 /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
132 /// ```
133 ///
134 /// When the empty string is used as a separator, it panics
135 ///
136 /// ```should_panic
137 /// use std::ffi::OsStr;
138 /// use clap_lex::OsStrExt as _;
139 /// let f: Vec<_> = OsStr::new("rust").split("").collect();
140 /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
141 /// ```
142 ///
143 /// Contiguous separators can lead to possibly surprising behavior
144 /// when whitespace is used as the separator. This code is correct:
145 ///
146 /// ```
147 /// use std::ffi::OsStr;
148 /// use clap_lex::OsStrExt as _;
149 /// let x = OsStr::new(" a b c");
150 /// let d: Vec<_> = x.split(" ").collect();
151 ///
152 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
153 /// ```
154 ///
155 /// It does _not_ give you:
156 ///
157 /// ```,ignore
158 /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
159 /// ```
160 ///
161 /// Use [`split_whitespace`] for this behavior.
162 ///
163 /// [`split_whitespace`]: str::split_whitespace
164 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
165 /// Splits the string on the first occurrence of the specified delimiter and
166 /// returns prefix before delimiter and suffix after delimiter.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// use std::ffi::OsStr;
172 /// use clap_lex::OsStrExt as _;
173 /// assert_eq!(OsStr::new("cfg").split_once("="), None);
174 /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
175 /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
176 /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
177 /// ```
178 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
179}
180
181impl OsStrExt for OsStr {
182 fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
183 let bytes = to_bytes(self);
184 std::str::from_utf8(bytes)
185 }
186
187 fn contains(&self, needle: &str) -> bool {
188 self.find(needle).is_some()
189 }
190
191 fn find(&self, needle: &str) -> Option<usize> {
192 let bytes = to_bytes(self);
193 (0..=self.len().checked_sub(needle.len())?)
194 .find(|&x| bytes[x..].starts_with(needle.as_bytes()))
195 }
196
197 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
198 let bytes = to_bytes(self);
199 bytes.strip_prefix(prefix.as_bytes()).map(|s| {
200 // SAFETY:
201 // - This came from `to_bytes`
202 // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
203 unsafe { to_os_str_unchecked(s) }
204 })
205 }
206 fn starts_with(&self, prefix: &str) -> bool {
207 let bytes = to_bytes(self);
208 bytes.starts_with(prefix.as_bytes())
209 }
210
211 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
212 assert_ne!(needle, "");
213 Split {
214 haystack: Some(self),
215 needle,
216 }
217 }
218
219 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
220 let start = self.find(needle)?;
221 let end = start + needle.len();
222 let haystack = to_bytes(self);
223 let first = &haystack[0..start];
224 let second = &haystack[end..];
225 // SAFETY:
226 // - This came from `to_bytes`
227 // - Since `needle` is `&str`, any split will be along UTF-8 boundarie
228 unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
229 }
230}
231
232mod private {
233 pub trait Sealed {}
234
235 impl Sealed for std::ffi::OsStr {}
236}
237
238/// Allow access to raw bytes
239///
240/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
241/// 7-bit ASCII or `&str`
242///
243/// # Compatibility
244///
245/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
246/// (since its dependent on rustc)
247fn to_bytes(s: &OsStr) -> &[u8] {
248 // SAFETY:
249 // - Lifetimes are the same
250 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
251 // - The primary contract is that the encoding for invalid surrogate code points is not
252 // guaranteed which isn't a problem here
253 //
254 // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
255 // but its in limbo
256 unsafe { std::mem::transmute(s) }
257}
258
259/// Restore raw bytes as `OsStr`
260///
261/// # Safety
262///
263/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
264/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
265unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
266 // SAFETY:
267 // - Lifetimes are the same
268 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
269 // - The primary contract is that the encoding for invalid surrogate code points is not
270 // guaranteed which isn't a problem here
271 //
272 // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
273 // but its in limbo
274 std::mem::transmute(s)
275}
276
277pub struct Split<'s, 'n> {
278 haystack: Option<&'s OsStr>,
279 needle: &'n str,
280}
281
282impl<'s, 'n> Iterator for Split<'s, 'n> {
283 type Item = &'s OsStr;
284
285 fn next(&mut self) -> Option<Self::Item> {
286 let haystack = self.haystack?;
287 match haystack.split_once(self.needle) {
288 Some((first, second)) => {
289 if !haystack.is_empty() {
290 debug_assert_ne!(haystack, second);
291 }
292 self.haystack = Some(second);
293 Some(first)
294 }
295 None => {
296 self.haystack = None;
297 Some(haystack)
298 }
299 }
300 }
301}
302
303/// Split an `OsStr`
304///
305/// # Safety
306///
307/// `index` must be at a valid UTF-8 boundary
308pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
309 let bytes = to_bytes(os);
310 let (first, second) = bytes.split_at(index);
311 (to_os_str_unchecked(first), to_os_str_unchecked(second))
312}
313