ext.rs - Codebrowser

1	use std::ffi::OsStr;
2
3	pub trait OsStrExt: private::Sealed {
4	/// Converts to a string slice.
5	fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
6	/// Returns `true` if the given pattern matches a sub-slice of
7	/// this string slice.
8	///
9	/// Returns `false` if it does not.
10	///
11	/// # Examples
12	///
13	/// ```rust
14	/// use clap_lex::OsStrExt as _;
15	/// let bananas = std::ffi::OsStr::new("bananas");
16	///
17	/// assert!(bananas.contains("nana"));
18	/// assert!(!bananas.contains("apples"));
19	/// ```
20	fn contains(&self, needle: &str) -> bool;
21	/// Returns the byte index of the first character of this string slice that
22	/// matches the pattern.
23	///
24	/// Returns [`None`] if the pattern doesn't match.
25	///
26	/// # Examples
27	///
28	/// ```rust
29	/// use clap_lex::OsStrExt as _;
30	/// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
31	///
32	/// assert_eq!(s.find("L"), Some(`0`));
33	/// assert_eq!(s.find("é"), Some(`14`));
34	/// assert_eq!(s.find("par"), Some(`17`));
35	/// ```
36	///
37	/// Not finding the pattern:
38	///
39	/// ```rust
40	/// use clap_lex::OsStrExt as _;
41	/// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
42	///
43	/// assert_eq!(s.find("1"), None);
44	/// ```
45	fn find(&self, needle: &str) -> Option<usize>;
46	/// Returns a string slice with the prefix removed.
47	///
48	/// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
49	/// in `Some`.
50	///
51	/// If the string does not start with `prefix`, returns `None`.
52	///
53	/// # Examples
54	///
55	/// ```
56	/// use std::ffi::OsStr;
57	/// use clap_lex::OsStrExt as _;
58	/// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
59	/// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
60	/// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
61	/// ```
62	fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
63	/// Returns `true` if the given pattern matches a prefix of this
64	/// string slice.
65	///
66	/// Returns `false` if it does not.
67	///
68	/// # Examples
69	///
70	/// ```
71	/// use clap_lex::OsStrExt as _;
72	/// let bananas = std::ffi::OsStr::new("bananas");
73	///
74	/// assert!(bananas.starts_with("bana"));
75	/// assert!(!bananas.starts_with("nana"));
76	/// ```
77	fn starts_with(&self, prefix: &str) -> bool;
78	/// An iterator over substrings of this string slice, separated by
79	/// characters matched by a pattern.
80	///
81	/// # Examples
82	///
83	/// Simple patterns:
84	///
85	/// ```
86	/// use std::ffi::OsStr;
87	/// use clap_lex::OsStrExt as _;
88	/// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
89	/// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
90	///
91	/// let v: Vec<_> = OsStr::new("").split("X").collect();
92	/// assert_eq!(v, [OsStr::new("")]);
93	///
94	/// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
95	/// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
96	///
97	/// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
98	/// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
99	/// ```
100	///
101	/// If a string contains multiple contiguous separators, you will end up
102	/// with empty strings in the output:
103	///
104	/// ```
105	/// use std::ffi::OsStr;
106	/// use clap_lex::OsStrExt as _;
107	/// let x = OsStr::new("\|\|\|\|a\|\|b\|c");
108	/// let d: Vec<_> = x.split("\|").collect();
109	///
110	/// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
111	/// ```
112	///
113	/// Contiguous separators are separated by the empty string.
114	///
115	/// ```
116	/// use std::ffi::OsStr;
117	/// use clap_lex::OsStrExt as _;
118	/// let x = OsStr::new("(///)");
119	/// let d: Vec<_> = x.split("/").collect();
120	///
121	/// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
122	/// ```
123	///
124	/// Separators at the start or end of a string are neighbored
125	/// by empty strings.
126	///
127	/// ```
128	/// use std::ffi::OsStr;
129	/// use clap_lex::OsStrExt as _;
130	/// let d: Vec<_> = OsStr::new("010").split("0").collect();
131	/// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
132	/// ```
133	///
134	/// When the empty string is used as a separator, it panics
135	///
136	/// ```should_panic
137	/// use std::ffi::OsStr;
138	/// use clap_lex::OsStrExt as _;
139	/// let f: Vec<_> = OsStr::new("rust").split("").collect();
140	/// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
141	/// ```
142	///
143	/// Contiguous separators can lead to possibly surprising behavior
144	/// when whitespace is used as the separator. This code is correct:
145	///
146	/// ```
147	/// use std::ffi::OsStr;
148	/// use clap_lex::OsStrExt as _;
149	/// let x = OsStr::new(" a b c");
150	/// let d: Vec<_> = x.split(" ").collect();
151	///
152	/// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
153	/// ```
154	///
155	/// It does _not_ give you:
156	///
157	/// ```,ignore
158	/// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
159	/// ```
160	///
161	/// Use [`split_whitespace`] for this behavior.
162	///
163	/// [`split_whitespace`]: str::split_whitespace
164	fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
165	/// Splits the string on the first occurrence of the specified delimiter and
166	/// returns prefix before delimiter and suffix after delimiter.
167	///
168	/// # Examples
169	///
170	/// ```
171	/// use std::ffi::OsStr;
172	/// use clap_lex::OsStrExt as _;
173	/// assert_eq!(OsStr::new("cfg").split_once("="), None);
174	/// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
175	/// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
176	/// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
177	/// ```
178	fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
179	}
180
181	impl OsStrExt for OsStr {
182	fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
183	let bytes = to_bytes(self);
184	std::str::from_utf8(bytes)
185	}
186
187	fn contains(&self, needle: &str) -> bool {
188	self.find(needle).is_some()
189	}
190
191	fn find(&self, needle: &str) -> Option<usize> {
192	let bytes = to_bytes(self);
193	(`0`..=self.len().checked_sub(needle.len())?)
194	.find(\|&x\| bytes[x..].starts_with(needle.as_bytes()))
195	}
196
197	fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
198	let bytes = to_bytes(self);
199	bytes.strip_prefix(prefix.as_bytes()).map(\|s\| {
200	// SAFETY:
201	// - This came from `to_bytes`
202	// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
203	unsafe { to_os_str_unchecked(s) }
204	})
205	}
206	fn starts_with(&self, prefix: &str) -> bool {
207	let bytes = to_bytes(self);
208	bytes.starts_with(prefix.as_bytes())
209	}
210
211	fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
212	assert_ne!(needle, "");
213	Split {
214	haystack: Some(self),
215	needle,
216	}
217	}
218
219	fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
220	let start = self.find(needle)?;
221	let end = start + needle.len();
222	let haystack = to_bytes(self);
223	let first = &haystack[`0`..start];
224	let second = &haystack[end..];
225	// SAFETY:
226	// - This came from `to_bytes`
227	// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
228	unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
229	}
230	}
231
232	mod private {
233	pub trait Sealed {}
234
235	impl Sealed for std::ffi::OsStr {}
236	}
237
238	/// Allow access to raw bytes
239	///
240	/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
241	/// 7-bit ASCII or `&str`
242	///
243	/// # Compatibility
244	///
245	/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
246	/// (since its dependent on rustc)
247	fn to_bytes(s: &OsStr) -> &[u8] {
248	// SAFETY:
249	// - Lifetimes are the same
250	// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
251	// - The primary contract is that the encoding for invalid surrogate code points is not
252	// guaranteed which isn't a problem here
253	//
254	// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
255	// but its in limbo
256	unsafe { std::mem::transmute(s) }
257	}
258
259	/// Restore raw bytes as `OsStr`
260	///
261	/// # Safety
262	///
263	/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
264	/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
265	unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
266	// SAFETY:
267	// - Lifetimes are the same
268	// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
269	// - The primary contract is that the encoding for invalid surrogate code points is not
270	// guaranteed which isn't a problem here
271	//
272	// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
273	// but its in limbo
274	std::mem::transmute(s)
275	}
276
277	pub struct Split<'s, 'n> {
278	haystack: Option<&'s OsStr>,
279	needle: &'n str,
280	}
281
282	impl<'s, 'n> Iterator for Split<'s, 'n> {
283	type Item = &'s OsStr;
284
285	fn next(&mut self) -> Option<Self::Item> {
286	let haystack = self.haystack?;
287	match haystack.split_once(self.needle) {
288	Some((first, second)) => {
289	if !haystack.is_empty() {
290	debug_assert_ne!(haystack, second);
291	}
292	self.haystack = Some(second);
293	Some(first)
294	}
295	None => {
296	self.haystack = None;
297	Some(haystack)
298	}
299	}
300	}
301	}
302
303	/// Split an `OsStr`
304	///
305	/// # Safety
306	///
307	/// `index` must be at a valid UTF-8 boundary
308	pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
309	let bytes = to_bytes(os);
310	let (first, second) = bytes.split_at(index);
311	(to_os_str_unchecked(first), to_os_str_unchecked(second))
312	}
313

Provided by KDAB

Definitions