1 | //! The underlying OsString/OsStr implementation on Unix and many other |
2 | //! systems: just a `Vec<u8>`/`[u8]`. |
3 | |
4 | use crate::borrow::Cow; |
5 | use crate::collections::TryReserveError; |
6 | use crate::fmt; |
7 | use crate::fmt::Write; |
8 | use crate::mem; |
9 | use crate::rc::Rc; |
10 | use crate::str; |
11 | use crate::sync::Arc; |
12 | use crate::sys_common::{AsInner, IntoInner}; |
13 | |
14 | use core::str::Utf8Chunks; |
15 | |
16 | #[cfg (test)] |
17 | mod tests; |
18 | |
19 | #[derive (Hash)] |
20 | #[repr (transparent)] |
21 | pub struct Buf { |
22 | pub inner: Vec<u8>, |
23 | } |
24 | |
25 | #[repr (transparent)] |
26 | pub struct Slice { |
27 | pub inner: [u8], |
28 | } |
29 | |
30 | impl fmt::Debug for Slice { |
31 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
32 | fmt::Debug::fmt(&Utf8Chunks::new(&self.inner).debug(), f) |
33 | } |
34 | } |
35 | |
36 | impl fmt::Display for Slice { |
37 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
38 | // If we're the empty string then our iterator won't actually yield |
39 | // anything, so perform the formatting manually |
40 | if self.inner.is_empty() { |
41 | return "" .fmt(f); |
42 | } |
43 | |
44 | for chunk: Utf8Chunk<'_> in Utf8Chunks::new(&self.inner) { |
45 | let valid: &str = chunk.valid(); |
46 | // If we successfully decoded the whole chunk as a valid string then |
47 | // we can return a direct formatting of the string which will also |
48 | // respect various formatting flags if possible. |
49 | if chunk.invalid().is_empty() { |
50 | return valid.fmt(f); |
51 | } |
52 | |
53 | f.write_str(data:valid)?; |
54 | f.write_char(char::REPLACEMENT_CHARACTER)?; |
55 | } |
56 | Ok(()) |
57 | } |
58 | } |
59 | |
60 | impl fmt::Debug for Buf { |
61 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { |
62 | fmt::Debug::fmt(self.as_slice(), f:formatter) |
63 | } |
64 | } |
65 | |
66 | impl fmt::Display for Buf { |
67 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { |
68 | fmt::Display::fmt(self.as_slice(), f:formatter) |
69 | } |
70 | } |
71 | |
72 | impl Clone for Buf { |
73 | #[inline ] |
74 | fn clone(&self) -> Self { |
75 | Buf { inner: self.inner.clone() } |
76 | } |
77 | |
78 | #[inline ] |
79 | fn clone_from(&mut self, source: &Self) { |
80 | self.inner.clone_from(&source.inner) |
81 | } |
82 | } |
83 | |
84 | impl IntoInner<Vec<u8>> for Buf { |
85 | fn into_inner(self) -> Vec<u8> { |
86 | self.inner |
87 | } |
88 | } |
89 | |
90 | impl AsInner<[u8]> for Buf { |
91 | #[inline ] |
92 | fn as_inner(&self) -> &[u8] { |
93 | &self.inner |
94 | } |
95 | } |
96 | |
97 | impl Buf { |
98 | #[inline ] |
99 | pub fn into_encoded_bytes(self) -> Vec<u8> { |
100 | self.inner |
101 | } |
102 | |
103 | #[inline ] |
104 | pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self { |
105 | Self { inner: s } |
106 | } |
107 | |
108 | pub fn from_string(s: String) -> Buf { |
109 | Buf { inner: s.into_bytes() } |
110 | } |
111 | |
112 | #[inline ] |
113 | pub fn with_capacity(capacity: usize) -> Buf { |
114 | Buf { inner: Vec::with_capacity(capacity) } |
115 | } |
116 | |
117 | #[inline ] |
118 | pub fn clear(&mut self) { |
119 | self.inner.clear() |
120 | } |
121 | |
122 | #[inline ] |
123 | pub fn capacity(&self) -> usize { |
124 | self.inner.capacity() |
125 | } |
126 | |
127 | #[inline ] |
128 | pub fn reserve(&mut self, additional: usize) { |
129 | self.inner.reserve(additional) |
130 | } |
131 | |
132 | #[inline ] |
133 | pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { |
134 | self.inner.try_reserve(additional) |
135 | } |
136 | |
137 | #[inline ] |
138 | pub fn reserve_exact(&mut self, additional: usize) { |
139 | self.inner.reserve_exact(additional) |
140 | } |
141 | |
142 | #[inline ] |
143 | pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { |
144 | self.inner.try_reserve_exact(additional) |
145 | } |
146 | |
147 | #[inline ] |
148 | pub fn shrink_to_fit(&mut self) { |
149 | self.inner.shrink_to_fit() |
150 | } |
151 | |
152 | #[inline ] |
153 | pub fn shrink_to(&mut self, min_capacity: usize) { |
154 | self.inner.shrink_to(min_capacity) |
155 | } |
156 | |
157 | #[inline ] |
158 | pub fn as_slice(&self) -> &Slice { |
159 | // SAFETY: Slice just wraps [u8], |
160 | // and &*self.inner is &[u8], therefore |
161 | // transmuting &[u8] to &Slice is safe. |
162 | unsafe { mem::transmute(&*self.inner) } |
163 | } |
164 | |
165 | #[inline ] |
166 | pub fn as_mut_slice(&mut self) -> &mut Slice { |
167 | // SAFETY: Slice just wraps [u8], |
168 | // and &mut *self.inner is &mut [u8], therefore |
169 | // transmuting &mut [u8] to &mut Slice is safe. |
170 | unsafe { mem::transmute(&mut *self.inner) } |
171 | } |
172 | |
173 | pub fn into_string(self) -> Result<String, Buf> { |
174 | String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) |
175 | } |
176 | |
177 | pub fn push_slice(&mut self, s: &Slice) { |
178 | self.inner.extend_from_slice(&s.inner) |
179 | } |
180 | |
181 | #[inline ] |
182 | pub fn into_box(self) -> Box<Slice> { |
183 | unsafe { mem::transmute(self.inner.into_boxed_slice()) } |
184 | } |
185 | |
186 | #[inline ] |
187 | pub fn from_box(boxed: Box<Slice>) -> Buf { |
188 | let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; |
189 | Buf { inner: inner.into_vec() } |
190 | } |
191 | |
192 | #[inline ] |
193 | pub fn into_arc(&self) -> Arc<Slice> { |
194 | self.as_slice().into_arc() |
195 | } |
196 | |
197 | #[inline ] |
198 | pub fn into_rc(&self) -> Rc<Slice> { |
199 | self.as_slice().into_rc() |
200 | } |
201 | } |
202 | |
203 | impl Slice { |
204 | #[inline ] |
205 | pub fn as_encoded_bytes(&self) -> &[u8] { |
206 | &self.inner |
207 | } |
208 | |
209 | #[inline ] |
210 | pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { |
211 | unsafe { mem::transmute(s) } |
212 | } |
213 | |
214 | #[track_caller ] |
215 | #[inline ] |
216 | pub fn check_public_boundary(&self, index: usize) { |
217 | if index == 0 || index == self.inner.len() { |
218 | return; |
219 | } |
220 | if index < self.inner.len() |
221 | && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) |
222 | { |
223 | return; |
224 | } |
225 | |
226 | slow_path(&self.inner, index); |
227 | |
228 | /// We're betting that typical splits will involve an ASCII character. |
229 | /// |
230 | /// Putting the expensive checks in a separate function generates notably |
231 | /// better assembly. |
232 | #[track_caller ] |
233 | #[inline (never)] |
234 | fn slow_path(bytes: &[u8], index: usize) { |
235 | let (before, after) = bytes.split_at(index); |
236 | |
237 | // UTF-8 takes at most 4 bytes per codepoint, so we don't |
238 | // need to check more than that. |
239 | let after = after.get(..4).unwrap_or(after); |
240 | match str::from_utf8(after) { |
241 | Ok(_) => return, |
242 | Err(err) if err.valid_up_to() != 0 => return, |
243 | Err(_) => (), |
244 | } |
245 | |
246 | for len in 2..=4.min(index) { |
247 | let before = &before[index - len..]; |
248 | if str::from_utf8(before).is_ok() { |
249 | return; |
250 | } |
251 | } |
252 | |
253 | panic!("byte index {index} is not an OsStr boundary" ); |
254 | } |
255 | } |
256 | |
257 | #[inline ] |
258 | pub fn from_str(s: &str) -> &Slice { |
259 | unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } |
260 | } |
261 | |
262 | pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { |
263 | str::from_utf8(&self.inner) |
264 | } |
265 | |
266 | pub fn to_string_lossy(&self) -> Cow<'_, str> { |
267 | String::from_utf8_lossy(&self.inner) |
268 | } |
269 | |
270 | pub fn to_owned(&self) -> Buf { |
271 | Buf { inner: self.inner.to_vec() } |
272 | } |
273 | |
274 | pub fn clone_into(&self, buf: &mut Buf) { |
275 | self.inner.clone_into(&mut buf.inner) |
276 | } |
277 | |
278 | #[inline ] |
279 | pub fn into_box(&self) -> Box<Slice> { |
280 | let boxed: Box<[u8]> = self.inner.into(); |
281 | unsafe { mem::transmute(boxed) } |
282 | } |
283 | |
284 | pub fn empty_box() -> Box<Slice> { |
285 | let boxed: Box<[u8]> = Default::default(); |
286 | unsafe { mem::transmute(boxed) } |
287 | } |
288 | |
289 | #[inline ] |
290 | pub fn into_arc(&self) -> Arc<Slice> { |
291 | let arc: Arc<[u8]> = Arc::from(&self.inner); |
292 | unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } |
293 | } |
294 | |
295 | #[inline ] |
296 | pub fn into_rc(&self) -> Rc<Slice> { |
297 | let rc: Rc<[u8]> = Rc::from(&self.inner); |
298 | unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } |
299 | } |
300 | |
301 | #[inline ] |
302 | pub fn make_ascii_lowercase(&mut self) { |
303 | self.inner.make_ascii_lowercase() |
304 | } |
305 | |
306 | #[inline ] |
307 | pub fn make_ascii_uppercase(&mut self) { |
308 | self.inner.make_ascii_uppercase() |
309 | } |
310 | |
311 | #[inline ] |
312 | pub fn to_ascii_lowercase(&self) -> Buf { |
313 | Buf { inner: self.inner.to_ascii_lowercase() } |
314 | } |
315 | |
316 | #[inline ] |
317 | pub fn to_ascii_uppercase(&self) -> Buf { |
318 | Buf { inner: self.inner.to_ascii_uppercase() } |
319 | } |
320 | |
321 | #[inline ] |
322 | pub fn is_ascii(&self) -> bool { |
323 | self.inner.is_ascii() |
324 | } |
325 | |
326 | #[inline ] |
327 | pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { |
328 | self.inner.eq_ignore_ascii_case(&other.inner) |
329 | } |
330 | } |
331 | |