1 | //! The underlying OsString/OsStr implementation on Unix and many other |
---|---|
2 | //! systems: just a `Vec<u8>`/`[u8]`. |
3 | |
4 | use core::clone::CloneToUninit; |
5 | |
6 | use crate::borrow::Cow; |
7 | use crate::collections::TryReserveError; |
8 | use crate::fmt::Write; |
9 | use crate::rc::Rc; |
10 | use crate::sync::Arc; |
11 | use crate::sys_common::{AsInner, FromInner, IntoInner}; |
12 | use crate::{fmt, mem, str}; |
13 | |
14 | #[cfg(test)] |
15 | mod tests; |
16 | |
17 | #[derive(Hash)] |
18 | #[repr(transparent)] |
19 | pub struct Buf { |
20 | pub inner: Vec<u8>, |
21 | } |
22 | |
23 | #[repr(transparent)] |
24 | pub struct Slice { |
25 | pub inner: [u8], |
26 | } |
27 | |
28 | impl IntoInner<Vec<u8>> for Buf { |
29 | fn into_inner(self) -> Vec<u8> { |
30 | self.inner |
31 | } |
32 | } |
33 | |
34 | impl FromInner<Vec<u8>> for Buf { |
35 | fn from_inner(inner: Vec<u8>) -> Self { |
36 | Buf { inner } |
37 | } |
38 | } |
39 | |
40 | impl AsInner<[u8]> for Buf { |
41 | #[inline] |
42 | fn as_inner(&self) -> &[u8] { |
43 | &self.inner |
44 | } |
45 | } |
46 | |
47 | impl fmt::Debug for Buf { |
48 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
49 | fmt::Debug::fmt(self.as_slice(), f) |
50 | } |
51 | } |
52 | |
53 | impl fmt::Display for Buf { |
54 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
55 | fmt::Display::fmt(self.as_slice(), f) |
56 | } |
57 | } |
58 | |
59 | impl fmt::Debug for Slice { |
60 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
61 | fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f) |
62 | } |
63 | } |
64 | |
65 | impl fmt::Display for Slice { |
66 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
67 | // If we're the empty string then our iterator won't actually yield |
68 | // anything, so perform the formatting manually |
69 | if self.inner.is_empty() { |
70 | return "".fmt(f); |
71 | } |
72 | |
73 | for chunk: Utf8Chunk<'_> in self.inner.utf8_chunks() { |
74 | let valid: &str = chunk.valid(); |
75 | // If we successfully decoded the whole chunk as a valid string then |
76 | // we can return a direct formatting of the string which will also |
77 | // respect various formatting flags if possible. |
78 | if chunk.invalid().is_empty() { |
79 | return valid.fmt(f); |
80 | } |
81 | |
82 | f.write_str(data:valid)?; |
83 | f.write_char(char::REPLACEMENT_CHARACTER)?; |
84 | } |
85 | Ok(()) |
86 | } |
87 | } |
88 | |
89 | impl Clone for Buf { |
90 | #[inline] |
91 | fn clone(&self) -> Self { |
92 | Buf { inner: self.inner.clone() } |
93 | } |
94 | |
95 | #[inline] |
96 | fn clone_from(&mut self, source: &Self) { |
97 | self.inner.clone_from(&source.inner) |
98 | } |
99 | } |
100 | |
101 | impl Buf { |
102 | #[inline] |
103 | pub fn into_encoded_bytes(self) -> Vec<u8> { |
104 | self.inner |
105 | } |
106 | |
107 | #[inline] |
108 | pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self { |
109 | Self { inner: s } |
110 | } |
111 | |
112 | #[inline] |
113 | pub fn into_string(self) -> Result<String, Buf> { |
114 | String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) |
115 | } |
116 | |
117 | #[inline] |
118 | pub const fn from_string(s: String) -> Buf { |
119 | Buf { inner: s.into_bytes() } |
120 | } |
121 | |
122 | #[inline] |
123 | pub fn with_capacity(capacity: usize) -> Buf { |
124 | Buf { inner: Vec::with_capacity(capacity) } |
125 | } |
126 | |
127 | #[inline] |
128 | pub fn clear(&mut self) { |
129 | self.inner.clear() |
130 | } |
131 | |
132 | #[inline] |
133 | pub fn capacity(&self) -> usize { |
134 | self.inner.capacity() |
135 | } |
136 | |
137 | #[inline] |
138 | pub fn push_slice(&mut self, s: &Slice) { |
139 | self.inner.extend_from_slice(&s.inner) |
140 | } |
141 | |
142 | #[inline] |
143 | pub fn push_str(&mut self, s: &str) { |
144 | self.inner.extend_from_slice(s.as_bytes()); |
145 | } |
146 | |
147 | #[inline] |
148 | pub fn reserve(&mut self, additional: usize) { |
149 | self.inner.reserve(additional) |
150 | } |
151 | |
152 | #[inline] |
153 | pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { |
154 | self.inner.try_reserve(additional) |
155 | } |
156 | |
157 | #[inline] |
158 | pub fn reserve_exact(&mut self, additional: usize) { |
159 | self.inner.reserve_exact(additional) |
160 | } |
161 | |
162 | #[inline] |
163 | pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { |
164 | self.inner.try_reserve_exact(additional) |
165 | } |
166 | |
167 | #[inline] |
168 | pub fn shrink_to_fit(&mut self) { |
169 | self.inner.shrink_to_fit() |
170 | } |
171 | |
172 | #[inline] |
173 | pub fn shrink_to(&mut self, min_capacity: usize) { |
174 | self.inner.shrink_to(min_capacity) |
175 | } |
176 | |
177 | #[inline] |
178 | pub fn as_slice(&self) -> &Slice { |
179 | // SAFETY: Slice just wraps [u8], |
180 | // and &*self.inner is &[u8], therefore |
181 | // transmuting &[u8] to &Slice is safe. |
182 | unsafe { mem::transmute(self.inner.as_slice()) } |
183 | } |
184 | |
185 | #[inline] |
186 | pub fn as_mut_slice(&mut self) -> &mut Slice { |
187 | // SAFETY: Slice just wraps [u8], |
188 | // and &mut *self.inner is &mut [u8], therefore |
189 | // transmuting &mut [u8] to &mut Slice is safe. |
190 | unsafe { mem::transmute(self.inner.as_mut_slice()) } |
191 | } |
192 | |
193 | #[inline] |
194 | pub fn leak<'a>(self) -> &'a mut Slice { |
195 | unsafe { mem::transmute(self.inner.leak()) } |
196 | } |
197 | |
198 | #[inline] |
199 | pub fn into_box(self) -> Box<Slice> { |
200 | unsafe { mem::transmute(self.inner.into_boxed_slice()) } |
201 | } |
202 | |
203 | #[inline] |
204 | pub fn from_box(boxed: Box<Slice>) -> Buf { |
205 | let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; |
206 | Buf { inner: inner.into_vec() } |
207 | } |
208 | |
209 | #[inline] |
210 | pub fn into_arc(&self) -> Arc<Slice> { |
211 | self.as_slice().into_arc() |
212 | } |
213 | |
214 | #[inline] |
215 | pub fn into_rc(&self) -> Rc<Slice> { |
216 | self.as_slice().into_rc() |
217 | } |
218 | |
219 | /// Provides plumbing to `Vec::truncate` without giving full mutable access |
220 | /// to the `Vec`. |
221 | /// |
222 | /// # Safety |
223 | /// |
224 | /// The length must be at an `OsStr` boundary, according to |
225 | /// `Slice::check_public_boundary`. |
226 | #[inline] |
227 | pub unsafe fn truncate_unchecked(&mut self, len: usize) { |
228 | self.inner.truncate(len); |
229 | } |
230 | |
231 | /// Provides plumbing to `Vec::extend_from_slice` without giving full |
232 | /// mutable access to the `Vec`. |
233 | /// |
234 | /// # Safety |
235 | /// |
236 | /// This encoding has no safety requirements. |
237 | #[inline] |
238 | pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { |
239 | self.inner.extend_from_slice(other); |
240 | } |
241 | } |
242 | |
243 | impl Slice { |
244 | #[inline] |
245 | pub fn as_encoded_bytes(&self) -> &[u8] { |
246 | &self.inner |
247 | } |
248 | |
249 | #[inline] |
250 | pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { |
251 | unsafe { mem::transmute(s) } |
252 | } |
253 | |
254 | #[track_caller] |
255 | #[inline] |
256 | pub fn check_public_boundary(&self, index: usize) { |
257 | if index == 0 || index == self.inner.len() { |
258 | return; |
259 | } |
260 | if index < self.inner.len() |
261 | && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) |
262 | { |
263 | return; |
264 | } |
265 | |
266 | slow_path(&self.inner, index); |
267 | |
268 | /// We're betting that typical splits will involve an ASCII character. |
269 | /// |
270 | /// Putting the expensive checks in a separate function generates notably |
271 | /// better assembly. |
272 | #[track_caller] |
273 | #[inline(never)] |
274 | fn slow_path(bytes: &[u8], index: usize) { |
275 | let (before, after) = bytes.split_at(index); |
276 | |
277 | // UTF-8 takes at most 4 bytes per codepoint, so we don't |
278 | // need to check more than that. |
279 | let after = after.get(..4).unwrap_or(after); |
280 | match str::from_utf8(after) { |
281 | Ok(_) => return, |
282 | Err(err) if err.valid_up_to() != 0 => return, |
283 | Err(_) => (), |
284 | } |
285 | |
286 | for len in 2..=4.min(index) { |
287 | let before = &before[index - len..]; |
288 | if str::from_utf8(before).is_ok() { |
289 | return; |
290 | } |
291 | } |
292 | |
293 | panic!("byte index{index} is not an OsStr boundary"); |
294 | } |
295 | } |
296 | |
297 | #[inline] |
298 | pub fn from_str(s: &str) -> &Slice { |
299 | unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } |
300 | } |
301 | |
302 | #[inline] |
303 | pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { |
304 | str::from_utf8(&self.inner) |
305 | } |
306 | |
307 | #[inline] |
308 | pub fn to_string_lossy(&self) -> Cow<'_, str> { |
309 | String::from_utf8_lossy(&self.inner) |
310 | } |
311 | |
312 | #[inline] |
313 | pub fn to_owned(&self) -> Buf { |
314 | Buf { inner: self.inner.to_vec() } |
315 | } |
316 | |
317 | #[inline] |
318 | pub fn clone_into(&self, buf: &mut Buf) { |
319 | self.inner.clone_into(&mut buf.inner) |
320 | } |
321 | |
322 | #[inline] |
323 | pub fn into_box(&self) -> Box<Slice> { |
324 | let boxed: Box<[u8]> = self.inner.into(); |
325 | unsafe { mem::transmute(boxed) } |
326 | } |
327 | |
328 | #[inline] |
329 | pub fn empty_box() -> Box<Slice> { |
330 | let boxed: Box<[u8]> = Default::default(); |
331 | unsafe { mem::transmute(boxed) } |
332 | } |
333 | |
334 | #[inline] |
335 | pub fn into_arc(&self) -> Arc<Slice> { |
336 | let arc: Arc<[u8]> = Arc::from(&self.inner); |
337 | unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } |
338 | } |
339 | |
340 | #[inline] |
341 | pub fn into_rc(&self) -> Rc<Slice> { |
342 | let rc: Rc<[u8]> = Rc::from(&self.inner); |
343 | unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } |
344 | } |
345 | |
346 | #[inline] |
347 | pub fn make_ascii_lowercase(&mut self) { |
348 | self.inner.make_ascii_lowercase() |
349 | } |
350 | |
351 | #[inline] |
352 | pub fn make_ascii_uppercase(&mut self) { |
353 | self.inner.make_ascii_uppercase() |
354 | } |
355 | |
356 | #[inline] |
357 | pub fn to_ascii_lowercase(&self) -> Buf { |
358 | Buf { inner: self.inner.to_ascii_lowercase() } |
359 | } |
360 | |
361 | #[inline] |
362 | pub fn to_ascii_uppercase(&self) -> Buf { |
363 | Buf { inner: self.inner.to_ascii_uppercase() } |
364 | } |
365 | |
366 | #[inline] |
367 | pub fn is_ascii(&self) -> bool { |
368 | self.inner.is_ascii() |
369 | } |
370 | |
371 | #[inline] |
372 | pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { |
373 | self.inner.eq_ignore_ascii_case(&other.inner) |
374 | } |
375 | } |
376 | |
377 | #[unstable(feature = "clone_to_uninit", issue = "126799")] |
378 | unsafe impl CloneToUninit for Slice { |
379 | #[inline] |
380 | #[cfg_attr(debug_assertions, track_caller)] |
381 | unsafe fn clone_to_uninit(&self, dst: *mut u8) { |
382 | // SAFETY: we're just a transparent wrapper around [u8] |
383 | unsafe { self.inner.clone_to_uninit(dest:dst) } |
384 | } |
385 | } |
386 |
Definitions
- Buf
- inner
- Slice
- inner
- into_inner
- from_inner
- as_inner
- fmt
- fmt
- fmt
- fmt
- clone
- clone_from
- into_encoded_bytes
- from_encoded_bytes_unchecked
- into_string
- from_string
- with_capacity
- clear
- capacity
- push_slice
- push_str
- reserve
- try_reserve
- reserve_exact
- try_reserve_exact
- shrink_to_fit
- shrink_to
- as_slice
- as_mut_slice
- leak
- into_box
- from_box
- into_arc
- into_rc
- truncate_unchecked
- extend_from_slice_unchecked
- as_encoded_bytes
- from_encoded_bytes_unchecked
- check_public_boundary
- slow_path
- from_str
- to_str
- to_string_lossy
- to_owned
- clone_into
- into_box
- empty_box
- into_arc
- into_rc
- make_ascii_lowercase
- make_ascii_uppercase
- to_ascii_lowercase
- to_ascii_uppercase
- is_ascii
- eq_ignore_ascii_case
Learn Rust with the experts
Find out more