1 | //! [`CStr`] and its related types. |
---|---|
2 | |
3 | use crate::cmp::Ordering; |
4 | use crate::error::Error; |
5 | use crate::ffi::c_char; |
6 | use crate::intrinsics::const_eval_select; |
7 | use crate::iter::FusedIterator; |
8 | use crate::marker::PhantomData; |
9 | use crate::ptr::NonNull; |
10 | use crate::slice::memchr; |
11 | use crate::{fmt, ops, slice, str}; |
12 | |
13 | // FIXME: because this is doc(inline)d, we *have* to use intra-doc links because the actual link |
14 | // depends on where the item is being documented. however, since this is libcore, we can't |
15 | // actually reference libstd or liballoc in intra-doc links. so, the best we can do is remove the |
16 | // links to `CString` and `String` for now until a solution is developed |
17 | |
18 | /// Representation of a borrowed C string. |
19 | /// |
20 | /// This type represents a borrowed reference to a nul-terminated |
21 | /// array of bytes. It can be constructed safely from a <code>&[[u8]]</code> |
22 | /// slice, or unsafely from a raw `*const c_char`. It can be expressed as a |
23 | /// literal in the form `c"Hello world"`. |
24 | /// |
25 | /// The `CStr` can then be converted to a Rust <code>&[str]</code> by performing |
26 | /// UTF-8 validation, or into an owned `CString`. |
27 | /// |
28 | /// `&CStr` is to `CString` as <code>&[str]</code> is to `String`: the former |
29 | /// in each pair are borrowed references; the latter are owned |
30 | /// strings. |
31 | /// |
32 | /// Note that this structure does **not** have a guaranteed layout (the `repr(transparent)` |
33 | /// notwithstanding) and should not be placed in the signatures of FFI functions. |
34 | /// Instead, safe wrappers of FFI functions may leverage [`CStr::as_ptr`] and the unsafe |
35 | /// [`CStr::from_ptr`] constructor to provide a safe interface to other consumers. |
36 | /// |
37 | /// # Examples |
38 | /// |
39 | /// Inspecting a foreign C string: |
40 | /// |
41 | /// ``` |
42 | /// use std::ffi::CStr; |
43 | /// use std::os::raw::c_char; |
44 | /// |
45 | /// # /* Extern functions are awkward in doc comments - fake it instead |
46 | /// extern "C" { fn my_string() -> *const c_char; } |
47 | /// # */ unsafe extern "C"fn my_string() -> *const c_char { c"hello".as_ptr() } |
48 | /// |
49 | /// unsafe { |
50 | /// let slice = CStr::from_ptr(my_string()); |
51 | /// println!("string buffer size without nul terminator: {}", slice.to_bytes().len()); |
52 | /// } |
53 | /// ``` |
54 | /// |
55 | /// Passing a Rust-originating C string: |
56 | /// |
57 | /// ``` |
58 | /// use std::ffi::CStr; |
59 | /// use std::os::raw::c_char; |
60 | /// |
61 | /// fn work(data: &CStr) { |
62 | /// unsafe extern "C"fn work_with(s: *const c_char) {} |
63 | /// unsafe { work_with(data.as_ptr()) } |
64 | /// } |
65 | /// |
66 | /// let s = c"Hello world!"; |
67 | /// work(&s); |
68 | /// ``` |
69 | /// |
70 | /// Converting a foreign C string into a Rust `String`: |
71 | /// |
72 | /// ``` |
73 | /// use std::ffi::CStr; |
74 | /// use std::os::raw::c_char; |
75 | /// |
76 | /// # /* Extern functions are awkward in doc comments - fake it instead |
77 | /// extern "C" { fn my_string() -> *const c_char; } |
78 | /// # */ unsafe extern "C"fn my_string() -> *const c_char { c"hello".as_ptr() } |
79 | /// |
80 | /// fn my_string_safe() -> String { |
81 | /// let cstr = unsafe { CStr::from_ptr(my_string()) }; |
82 | /// // Get a copy-on-write Cow<'_, str>, then extract the |
83 | /// // allocated String (or allocate a fresh one if needed). |
84 | /// cstr.to_string_lossy().into_owned() |
85 | /// } |
86 | /// |
87 | /// println!("string: {}", my_string_safe()); |
88 | /// ``` |
89 | /// |
90 | /// [str]: prim@str "str" |
91 | #[derive(PartialEq, Eq, Hash)] |
92 | #[stable(feature = "core_c_str", since = "1.64.0")] |
93 | #[rustc_diagnostic_item= "cstr_type"] |
94 | #[rustc_has_incoherent_inherent_impls] |
95 | #[lang= "CStr"] |
96 | // `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies |
97 | // on `CStr` being layout-compatible with `[u8]`. |
98 | // However, `CStr` layout is considered an implementation detail and must not be relied upon. We |
99 | // want `repr(transparent)` but we don't want it to show up in rustdoc, so we hide it under |
100 | // `cfg(doc)`. This is an ad-hoc implementation of attribute privacy. |
101 | #[repr(transparent)] |
102 | pub struct CStr { |
103 | // FIXME: this should not be represented with a DST slice but rather with |
104 | // just a raw `c_char` along with some form of marker to make |
105 | // this an unsized type. Essentially `sizeof(&CStr)` should be the |
106 | // same as `sizeof(&c_char)` but `CStr` should be an unsized type. |
107 | inner: [c_char], |
108 | } |
109 | |
110 | /// An error indicating that a nul byte was not in the expected position. |
111 | /// |
112 | /// The slice used to create a [`CStr`] must have one and only one nul byte, |
113 | /// positioned at the end. |
114 | /// |
115 | /// This error is created by the [`CStr::from_bytes_with_nul`] method. |
116 | /// See its documentation for more. |
117 | /// |
118 | /// # Examples |
119 | /// |
120 | /// ``` |
121 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
122 | /// |
123 | /// let _: FromBytesWithNulError = CStr::from_bytes_with_nul(b"f\0 oo").unwrap_err(); |
124 | /// ``` |
125 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] |
126 | #[stable(feature = "core_c_str", since = "1.64.0")] |
127 | pub enum FromBytesWithNulError { |
128 | /// Data provided contains an interior nul byte at byte `position`. |
129 | InteriorNul { |
130 | /// The position of the interior nul byte. |
131 | position: usize, |
132 | }, |
133 | /// Data provided is not nul terminated. |
134 | NotNulTerminated, |
135 | } |
136 | |
137 | #[stable(feature = "frombyteswithnulerror_impls", since = "1.17.0")] |
138 | impl Error for FromBytesWithNulError { |
139 | #[allow(deprecated)] |
140 | fn description(&self) -> &str { |
141 | match self { |
142 | Self::InteriorNul { .. } => "data provided contains an interior nul byte", |
143 | Self::NotNulTerminated => "data provided is not nul terminated", |
144 | } |
145 | } |
146 | } |
147 | |
148 | /// An error indicating that no nul byte was present. |
149 | /// |
150 | /// A slice used to create a [`CStr`] must contain a nul byte somewhere |
151 | /// within the slice. |
152 | /// |
153 | /// This error is created by the [`CStr::from_bytes_until_nul`] method. |
154 | #[derive(Clone, PartialEq, Eq, Debug)] |
155 | #[stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")] |
156 | pub struct FromBytesUntilNulError(()); |
157 | |
158 | #[stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")] |
159 | impl fmt::Display for FromBytesUntilNulError { |
160 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
161 | write!(f, "data provided does not contain a nul") |
162 | } |
163 | } |
164 | |
165 | #[stable(feature = "cstr_debug", since = "1.3.0")] |
166 | impl fmt::Debug for CStr { |
167 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
168 | write!(f, "\"{}\" ", self.to_bytes().escape_ascii()) |
169 | } |
170 | } |
171 | |
172 | #[stable(feature = "cstr_default", since = "1.10.0")] |
173 | impl Default for &CStr { |
174 | #[inline] |
175 | fn default() -> Self { |
176 | const SLICE: &[c_char] = &[0]; |
177 | // SAFETY: `SLICE` is indeed pointing to a valid nul-terminated string. |
178 | unsafe { CStr::from_ptr(SLICE.as_ptr()) } |
179 | } |
180 | } |
181 | |
182 | #[stable(feature = "frombyteswithnulerror_impls", since = "1.17.0")] |
183 | impl fmt::Display for FromBytesWithNulError { |
184 | #[allow(deprecated, deprecated_in_future)] |
185 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
186 | f.write_str(self.description())?; |
187 | if let Self::InteriorNul { position: &usize } = self { |
188 | write!(f, " at byte pos{position} ")?; |
189 | } |
190 | Ok(()) |
191 | } |
192 | } |
193 | |
194 | impl CStr { |
195 | /// Wraps a raw C string with a safe C string wrapper. |
196 | /// |
197 | /// This function will wrap the provided `ptr` with a `CStr` wrapper, which |
198 | /// allows inspection and interoperation of non-owned C strings. The total |
199 | /// size of the terminated buffer must be smaller than [`isize::MAX`] **bytes** |
200 | /// in memory (a restriction from [`slice::from_raw_parts`]). |
201 | /// |
202 | /// # Safety |
203 | /// |
204 | /// * The memory pointed to by `ptr` must contain a valid nul terminator at the |
205 | /// end of the string. |
206 | /// |
207 | /// * `ptr` must be [valid] for reads of bytes up to and including the nul terminator. |
208 | /// This means in particular: |
209 | /// |
210 | /// * The entire memory range of this `CStr` must be contained within a single allocation! |
211 | /// * `ptr` must be non-null even for a zero-length cstr. |
212 | /// |
213 | /// * The memory referenced by the returned `CStr` must not be mutated for |
214 | /// the duration of lifetime `'a`. |
215 | /// |
216 | /// * The nul terminator must be within `isize::MAX` from `ptr` |
217 | /// |
218 | /// > **Note**: This operation is intended to be a 0-cost cast but it is |
219 | /// > currently implemented with an up-front calculation of the length of |
220 | /// > the string. This is not guaranteed to always be the case. |
221 | /// |
222 | /// # Caveat |
223 | /// |
224 | /// The lifetime for the returned slice is inferred from its usage. To prevent accidental misuse, |
225 | /// it's suggested to tie the lifetime to whichever source lifetime is safe in the context, |
226 | /// such as by providing a helper function taking the lifetime of a host value for the slice, |
227 | /// or by explicit annotation. |
228 | /// |
229 | /// # Examples |
230 | /// |
231 | /// ``` |
232 | /// use std::ffi::{c_char, CStr}; |
233 | /// |
234 | /// fn my_string() -> *const c_char { |
235 | /// c"hello".as_ptr() |
236 | /// } |
237 | /// |
238 | /// unsafe { |
239 | /// let slice = CStr::from_ptr(my_string()); |
240 | /// assert_eq!(slice.to_str().unwrap(), "hello"); |
241 | /// } |
242 | /// ``` |
243 | /// |
244 | /// ``` |
245 | /// use std::ffi::{c_char, CStr}; |
246 | /// |
247 | /// const HELLO_PTR: *const c_char = { |
248 | /// const BYTES: &[u8] = b"Hello, world!\0 "; |
249 | /// BYTES.as_ptr().cast() |
250 | /// }; |
251 | /// const HELLO: &CStr = unsafe { CStr::from_ptr(HELLO_PTR) }; |
252 | /// |
253 | /// assert_eq!(c"Hello, world!", HELLO); |
254 | /// ``` |
255 | /// |
256 | /// [valid]: core::ptr#safety |
257 | #[inline] // inline is necessary for codegen to see strlen. |
258 | #[must_use] |
259 | #[stable(feature = "rust1", since = "1.0.0")] |
260 | #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "1.81.0")] |
261 | pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { |
262 | // SAFETY: The caller has provided a pointer that points to a valid C |
263 | // string with a NUL terminator less than `isize::MAX` from `ptr`. |
264 | let len = unsafe { strlen(ptr) }; |
265 | |
266 | // SAFETY: The caller has provided a valid pointer with length less than |
267 | // `isize::MAX`, so `from_raw_parts` is safe. The content remains valid |
268 | // and doesn't change for the lifetime of the returned `CStr`. This |
269 | // means the call to `from_bytes_with_nul_unchecked` is correct. |
270 | // |
271 | // The cast from c_char to u8 is ok because a c_char is always one byte. |
272 | unsafe { Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1)) } |
273 | } |
274 | |
275 | /// Creates a C string wrapper from a byte slice with any number of nuls. |
276 | /// |
277 | /// This method will create a `CStr` from any byte slice that contains at |
278 | /// least one nul byte. Unlike with [`CStr::from_bytes_with_nul`], the caller |
279 | /// does not need to know where the nul byte is located. |
280 | /// |
281 | /// If the first byte is a nul character, this method will return an |
282 | /// empty `CStr`. If multiple nul characters are present, the `CStr` will |
283 | /// end at the first one. |
284 | /// |
285 | /// If the slice only has a single nul byte at the end, this method is |
286 | /// equivalent to [`CStr::from_bytes_with_nul`]. |
287 | /// |
288 | /// # Examples |
289 | /// ``` |
290 | /// use std::ffi::CStr; |
291 | /// |
292 | /// let mut buffer = [0u8; 16]; |
293 | /// unsafe { |
294 | /// // Here we might call an unsafe C function that writes a string |
295 | /// // into the buffer. |
296 | /// let buf_ptr = buffer.as_mut_ptr(); |
297 | /// buf_ptr.write_bytes(b'A', 8); |
298 | /// } |
299 | /// // Attempt to extract a C nul-terminated string from the buffer. |
300 | /// let c_str = CStr::from_bytes_until_nul(&buffer[..]).unwrap(); |
301 | /// assert_eq!(c_str.to_str().unwrap(), "AAAAAAAA"); |
302 | /// ``` |
303 | /// |
304 | #[stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")] |
305 | #[rustc_const_stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")] |
306 | pub const fn from_bytes_until_nul(bytes: &[u8]) -> Result<&CStr, FromBytesUntilNulError> { |
307 | let nul_pos = memchr::memchr(0, bytes); |
308 | match nul_pos { |
309 | Some(nul_pos) => { |
310 | // FIXME(const-hack) replace with range index |
311 | // SAFETY: nul_pos + 1 <= bytes.len() |
312 | let subslice = unsafe { crate::slice::from_raw_parts(bytes.as_ptr(), nul_pos + 1) }; |
313 | // SAFETY: We know there is a nul byte at nul_pos, so this slice |
314 | // (ending at the nul byte) is a well-formed C string. |
315 | Ok(unsafe { CStr::from_bytes_with_nul_unchecked(subslice) }) |
316 | } |
317 | None => Err(FromBytesUntilNulError(())), |
318 | } |
319 | } |
320 | |
321 | /// Creates a C string wrapper from a byte slice with exactly one nul |
322 | /// terminator. |
323 | /// |
324 | /// This function will cast the provided `bytes` to a `CStr` |
325 | /// wrapper after ensuring that the byte slice is nul-terminated |
326 | /// and does not contain any interior nul bytes. |
327 | /// |
328 | /// If the nul byte may not be at the end, |
329 | /// [`CStr::from_bytes_until_nul`] can be used instead. |
330 | /// |
331 | /// # Examples |
332 | /// |
333 | /// ``` |
334 | /// use std::ffi::CStr; |
335 | /// |
336 | /// let cstr = CStr::from_bytes_with_nul(b"hello\0 "); |
337 | /// assert_eq!(cstr, Ok(c"hello")); |
338 | /// ``` |
339 | /// |
340 | /// Creating a `CStr` without a trailing nul terminator is an error: |
341 | /// |
342 | /// ``` |
343 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
344 | /// |
345 | /// let cstr = CStr::from_bytes_with_nul(b"hello"); |
346 | /// assert_eq!(cstr, Err(FromBytesWithNulError::NotNulTerminated)); |
347 | /// ``` |
348 | /// |
349 | /// Creating a `CStr` with an interior nul byte is an error: |
350 | /// |
351 | /// ``` |
352 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
353 | /// |
354 | /// let cstr = CStr::from_bytes_with_nul(b"he\0 llo\0 "); |
355 | /// assert_eq!(cstr, Err(FromBytesWithNulError::InteriorNul { position: 2 })); |
356 | /// ``` |
357 | #[stable(feature = "cstr_from_bytes", since = "1.10.0")] |
358 | #[rustc_const_stable(feature = "const_cstr_methods", since = "1.72.0")] |
359 | pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, FromBytesWithNulError> { |
360 | let nul_pos = memchr::memchr(0, bytes); |
361 | match nul_pos { |
362 | Some(nul_pos) if nul_pos + 1 == bytes.len() => { |
363 | // SAFETY: We know there is only one nul byte, at the end |
364 | // of the byte slice. |
365 | Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) |
366 | } |
367 | Some(position) => Err(FromBytesWithNulError::InteriorNul { position }), |
368 | None => Err(FromBytesWithNulError::NotNulTerminated), |
369 | } |
370 | } |
371 | |
372 | /// Unsafely creates a C string wrapper from a byte slice. |
373 | /// |
374 | /// This function will cast the provided `bytes` to a `CStr` wrapper without |
375 | /// performing any sanity checks. |
376 | /// |
377 | /// # Safety |
378 | /// The provided slice **must** be nul-terminated and not contain any interior |
379 | /// nul bytes. |
380 | /// |
381 | /// # Examples |
382 | /// |
383 | /// ``` |
384 | /// use std::ffi::CStr; |
385 | /// |
386 | /// let bytes = b"Hello world!\0 "; |
387 | /// |
388 | /// let cstr = unsafe { CStr::from_bytes_with_nul_unchecked(bytes) }; |
389 | /// assert_eq!(cstr.to_bytes_with_nul(), bytes); |
390 | /// ``` |
391 | #[inline] |
392 | #[must_use] |
393 | #[stable(feature = "cstr_from_bytes", since = "1.10.0")] |
394 | #[rustc_const_stable(feature = "const_cstr_unchecked", since = "1.59.0")] |
395 | #[rustc_allow_const_fn_unstable(const_eval_select)] |
396 | pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { |
397 | const_eval_select!( |
398 | @capture { bytes: &[u8] } -> &CStr: |
399 | if const { |
400 | // Saturating so that an empty slice panics in the assert with a good |
401 | // message, not here due to underflow. |
402 | let mut i = bytes.len().saturating_sub(1); |
403 | assert!(!bytes.is_empty() && bytes[i] == 0, "input was not nul-terminated"); |
404 | |
405 | // Ending nul byte exists, skip to the rest. |
406 | while i != 0 { |
407 | i -= 1; |
408 | let byte = bytes[i]; |
409 | assert!(byte != 0, "input contained interior nul"); |
410 | } |
411 | |
412 | // SAFETY: See runtime cast comment below. |
413 | unsafe { &*(bytes as *const [u8] as *const CStr) } |
414 | } else { |
415 | // Chance at catching some UB at runtime with debug builds. |
416 | debug_assert!(!bytes.is_empty() && bytes[bytes.len() - 1] == 0); |
417 | |
418 | // SAFETY: Casting to CStr is safe because its internal representation |
419 | // is a [u8] too (safe only inside std). |
420 | // Dereferencing the obtained pointer is safe because it comes from a |
421 | // reference. Making a reference is then safe because its lifetime |
422 | // is bound by the lifetime of the given `bytes`. |
423 | unsafe { &*(bytes as *const [u8] as *const CStr) } |
424 | } |
425 | ) |
426 | } |
427 | |
428 | /// Returns the inner pointer to this C string. |
429 | /// |
430 | /// The returned pointer will be valid for as long as `self` is, and points |
431 | /// to a contiguous region of memory terminated with a 0 byte to represent |
432 | /// the end of the string. |
433 | /// |
434 | /// The type of the returned pointer is |
435 | /// [`*const c_char`][crate::ffi::c_char], and whether it's |
436 | /// an alias for `*const i8` or `*const u8` is platform-specific. |
437 | /// |
438 | /// **WARNING** |
439 | /// |
440 | /// The returned pointer is read-only; writing to it (including passing it |
441 | /// to C code that writes to it) causes undefined behavior. |
442 | /// |
443 | /// It is your responsibility to make sure that the underlying memory is not |
444 | /// freed too early. For example, the following code will cause undefined |
445 | /// behavior when `ptr` is used inside the `unsafe` block: |
446 | /// |
447 | /// ```no_run |
448 | /// # #![expect(dangling_pointers_from_temporaries)] |
449 | /// use std::ffi::{CStr, CString}; |
450 | /// |
451 | /// // 💀 The meaning of this entire program is undefined, |
452 | /// // 💀 and nothing about its behavior is guaranteed, |
453 | /// // 💀 not even that its behavior resembles the code as written, |
454 | /// // 💀 just because it contains a single instance of undefined behavior! |
455 | /// |
456 | /// // 🚨 creates a dangling pointer to a temporary `CString` |
457 | /// // 🚨 that is deallocated at the end of the statement |
458 | /// let ptr = CString::new("Hi!".to_uppercase()).unwrap().as_ptr(); |
459 | /// |
460 | /// // without undefined behavior, you would expect that `ptr` equals: |
461 | /// dbg!(CStr::from_bytes_with_nul(b"HI!\0 ").unwrap()); |
462 | /// |
463 | /// // 🙏 Possibly the program behaved as expected so far, |
464 | /// // 🙏 and this just shows `ptr` is now garbage..., but |
465 | /// // 💀 this violates `CStr::from_ptr`'s safety contract |
466 | /// // 💀 leading to a dereference of a dangling pointer, |
467 | /// // 💀 which is immediate undefined behavior. |
468 | /// // 💀 *BOOM*, you're dead, you're entire program has no meaning. |
469 | /// dbg!(unsafe { CStr::from_ptr(ptr) }); |
470 | /// ``` |
471 | /// |
472 | /// This happens because, the pointer returned by `as_ptr` does not carry any |
473 | /// lifetime information, and the `CString` is deallocated immediately after |
474 | /// the expression that it is part of has been evaluated. |
475 | /// To fix the problem, bind the `CString` to a local variable: |
476 | /// |
477 | /// ``` |
478 | /// use std::ffi::{CStr, CString}; |
479 | /// |
480 | /// let c_str = CString::new("Hi!".to_uppercase()).unwrap(); |
481 | /// let ptr = c_str.as_ptr(); |
482 | /// |
483 | /// assert_eq!(unsafe { CStr::from_ptr(ptr) }, c"HI!"); |
484 | /// ``` |
485 | #[inline] |
486 | #[must_use] |
487 | #[stable(feature = "rust1", since = "1.0.0")] |
488 | #[rustc_const_stable(feature = "const_str_as_ptr", since = "1.32.0")] |
489 | #[rustc_as_ptr] |
490 | #[rustc_never_returns_null_ptr] |
491 | pub const fn as_ptr(&self) -> *const c_char { |
492 | self.inner.as_ptr() |
493 | } |
494 | |
495 | /// We could eventually expose this publicly, if we wanted. |
496 | #[inline] |
497 | #[must_use] |
498 | const fn as_non_null_ptr(&self) -> NonNull<c_char> { |
499 | // FIXME(const_trait_impl) replace with `NonNull::from` |
500 | // SAFETY: a reference is never null |
501 | unsafe { NonNull::new_unchecked(&self.inner as *const [c_char] as *mut [c_char]) } |
502 | .as_non_null_ptr() |
503 | } |
504 | |
505 | /// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator. |
506 | /// |
507 | /// > **Note**: This method is currently implemented as a constant-time |
508 | /// > cast, but it is planned to alter its definition in the future to |
509 | /// > perform the length calculation whenever this method is called. |
510 | /// |
511 | /// # Examples |
512 | /// |
513 | /// ``` |
514 | /// assert_eq!(c"foo".count_bytes(), 3); |
515 | /// assert_eq!(c"".count_bytes(), 0); |
516 | /// ``` |
517 | #[inline] |
518 | #[must_use] |
519 | #[doc(alias( "len", "strlen"))] |
520 | #[stable(feature = "cstr_count_bytes", since = "1.79.0")] |
521 | #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "1.81.0")] |
522 | pub const fn count_bytes(&self) -> usize { |
523 | self.inner.len() - 1 |
524 | } |
525 | |
526 | /// Returns `true` if `self.to_bytes()` has a length of 0. |
527 | /// |
528 | /// # Examples |
529 | /// |
530 | /// ``` |
531 | /// assert!(!c"foo".is_empty()); |
532 | /// assert!(c"".is_empty()); |
533 | /// ``` |
534 | #[inline] |
535 | #[stable(feature = "cstr_is_empty", since = "1.71.0")] |
536 | #[rustc_const_stable(feature = "cstr_is_empty", since = "1.71.0")] |
537 | pub const fn is_empty(&self) -> bool { |
538 | // SAFETY: We know there is at least one byte; for empty strings it |
539 | // is the NUL terminator. |
540 | // FIXME(const-hack): use get_unchecked |
541 | unsafe { *self.inner.as_ptr() == 0 } |
542 | } |
543 | |
544 | /// Converts this C string to a byte slice. |
545 | /// |
546 | /// The returned slice will **not** contain the trailing nul terminator that this C |
547 | /// string has. |
548 | /// |
549 | /// > **Note**: This method is currently implemented as a constant-time |
550 | /// > cast, but it is planned to alter its definition in the future to |
551 | /// > perform the length calculation whenever this method is called. |
552 | /// |
553 | /// # Examples |
554 | /// |
555 | /// ``` |
556 | /// assert_eq!(c"foo".to_bytes(), b"foo"); |
557 | /// ``` |
558 | #[inline] |
559 | #[must_use= "this returns the result of the operation, \ |
560 | without modifying the original"] |
561 | #[stable(feature = "rust1", since = "1.0.0")] |
562 | #[rustc_const_stable(feature = "const_cstr_methods", since = "1.72.0")] |
563 | pub const fn to_bytes(&self) -> &[u8] { |
564 | let bytes = self.to_bytes_with_nul(); |
565 | // FIXME(const-hack) replace with range index |
566 | // SAFETY: to_bytes_with_nul returns slice with length at least 1 |
567 | unsafe { slice::from_raw_parts(bytes.as_ptr(), bytes.len() - 1) } |
568 | } |
569 | |
570 | /// Converts this C string to a byte slice containing the trailing 0 byte. |
571 | /// |
572 | /// This function is the equivalent of [`CStr::to_bytes`] except that it |
573 | /// will retain the trailing nul terminator instead of chopping it off. |
574 | /// |
575 | /// > **Note**: This method is currently implemented as a 0-cost cast, but |
576 | /// > it is planned to alter its definition in the future to perform the |
577 | /// > length calculation whenever this method is called. |
578 | /// |
579 | /// # Examples |
580 | /// |
581 | /// ``` |
582 | /// assert_eq!(c"foo".to_bytes_with_nul(), b"foo\0 "); |
583 | /// ``` |
584 | #[inline] |
585 | #[must_use= "this returns the result of the operation, \ |
586 | without modifying the original"] |
587 | #[stable(feature = "rust1", since = "1.0.0")] |
588 | #[rustc_const_stable(feature = "const_cstr_methods", since = "1.72.0")] |
589 | pub const fn to_bytes_with_nul(&self) -> &[u8] { |
590 | // SAFETY: Transmuting a slice of `c_char`s to a slice of `u8`s |
591 | // is safe on all supported targets. |
592 | unsafe { &*((&raw const self.inner) as *const [u8]) } |
593 | } |
594 | |
595 | /// Iterates over the bytes in this C string. |
596 | /// |
597 | /// The returned iterator will **not** contain the trailing nul terminator |
598 | /// that this C string has. |
599 | /// |
600 | /// # Examples |
601 | /// |
602 | /// ``` |
603 | /// #![feature(cstr_bytes)] |
604 | /// |
605 | /// assert!(c"foo".bytes().eq(* b"foo")); |
606 | /// ``` |
607 | #[inline] |
608 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
609 | pub fn bytes(&self) -> Bytes<'_> { |
610 | Bytes::new(self) |
611 | } |
612 | |
613 | /// Yields a <code>&[str]</code> slice if the `CStr` contains valid UTF-8. |
614 | /// |
615 | /// If the contents of the `CStr` are valid UTF-8 data, this |
616 | /// function will return the corresponding <code>&[str]</code> slice. Otherwise, |
617 | /// it will return an error with details of where UTF-8 validation failed. |
618 | /// |
619 | /// [str]: prim@str "str" |
620 | /// |
621 | /// # Examples |
622 | /// |
623 | /// ``` |
624 | /// assert_eq!(c"foo".to_str(), Ok( "foo")); |
625 | /// ``` |
626 | #[stable(feature = "cstr_to_str", since = "1.4.0")] |
627 | #[rustc_const_stable(feature = "const_cstr_methods", since = "1.72.0")] |
628 | pub const fn to_str(&self) -> Result<&str, str::Utf8Error> { |
629 | // N.B., when `CStr` is changed to perform the length check in `.to_bytes()` |
630 | // instead of in `from_ptr()`, it may be worth considering if this should |
631 | // be rewritten to do the UTF-8 check inline with the length calculation |
632 | // instead of doing it afterwards. |
633 | str::from_utf8(self.to_bytes()) |
634 | } |
635 | |
636 | /// Returns an object that implements [`Display`] for safely printing a [`CStr`] that may |
637 | /// contain non-Unicode data. |
638 | /// |
639 | /// Behaves as if `self` were first lossily converted to a `str`, with invalid UTF-8 presented |
640 | /// as the Unicode replacement character: �. |
641 | /// |
642 | /// [`Display`]: fmt::Display |
643 | /// |
644 | /// # Examples |
645 | /// |
646 | /// ``` |
647 | /// #![feature(cstr_display)] |
648 | /// |
649 | /// let cstr = c"Hello, world!"; |
650 | /// println!("{}", cstr.display()); |
651 | /// ``` |
652 | #[unstable(feature = "cstr_display", issue = "139984")] |
653 | #[must_use= "this does not display the `CStr`; \ |
654 | it returns an object that can be displayed"] |
655 | #[inline] |
656 | pub fn display(&self) -> impl fmt::Display { |
657 | crate::bstr::ByteStr::from_bytes(self.to_bytes()) |
658 | } |
659 | } |
660 | |
661 | // `.to_bytes()` representations are compared instead of the inner `[c_char]`s, |
662 | // because `c_char` is `i8` (not `u8`) on some platforms. |
663 | // That is why this is implemented manually and not derived. |
664 | #[stable(feature = "rust1", since = "1.0.0")] |
665 | impl PartialOrd for CStr { |
666 | #[inline] |
667 | fn partial_cmp(&self, other: &CStr) -> Option<Ordering> { |
668 | self.to_bytes().partial_cmp(&other.to_bytes()) |
669 | } |
670 | } |
671 | #[stable(feature = "rust1", since = "1.0.0")] |
672 | impl Ord for CStr { |
673 | #[inline] |
674 | fn cmp(&self, other: &CStr) -> Ordering { |
675 | self.to_bytes().cmp(&other.to_bytes()) |
676 | } |
677 | } |
678 | |
679 | #[stable(feature = "cstr_range_from", since = "1.47.0")] |
680 | impl ops::Index<ops::RangeFrom<usize>> for CStr { |
681 | type Output = CStr; |
682 | |
683 | #[inline] |
684 | fn index(&self, index: ops::RangeFrom<usize>) -> &CStr { |
685 | let bytes: &[u8] = self.to_bytes_with_nul(); |
686 | // we need to manually check the starting index to account for the null |
687 | // byte, since otherwise we could get an empty string that doesn't end |
688 | // in a null. |
689 | if index.start < bytes.len() { |
690 | // SAFETY: Non-empty tail of a valid `CStr` is still a valid `CStr`. |
691 | unsafe { CStr::from_bytes_with_nul_unchecked(&bytes[index.start..]) } |
692 | } else { |
693 | panic!( |
694 | "index out of bounds: the len is{} but the index is{} ", |
695 | bytes.len(), |
696 | index.start |
697 | ); |
698 | } |
699 | } |
700 | } |
701 | |
702 | #[stable(feature = "cstring_asref", since = "1.7.0")] |
703 | impl AsRef<CStr> for CStr { |
704 | #[inline] |
705 | fn as_ref(&self) -> &CStr { |
706 | self |
707 | } |
708 | } |
709 | |
710 | /// Calculate the length of a nul-terminated string. Defers to C's `strlen` when possible. |
711 | /// |
712 | /// # Safety |
713 | /// |
714 | /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be |
715 | /// located within `isize::MAX` from `ptr`. |
716 | #[inline] |
717 | #[unstable(feature = "cstr_internals", issue = "none")] |
718 | #[rustc_allow_const_fn_unstable(const_eval_select)] |
719 | const unsafe fn strlen(ptr: *const c_char) -> usize { |
720 | const_eval_select!( |
721 | @capture { s: *const c_char = ptr } -> usize: |
722 | if const { |
723 | let mut len = 0; |
724 | |
725 | // SAFETY: Outer caller has provided a pointer to a valid C string. |
726 | while unsafe { *s.add(len) } != 0 { |
727 | len += 1; |
728 | } |
729 | |
730 | len |
731 | } else { |
732 | unsafe extern "C"{ |
733 | /// Provided by libc or compiler_builtins. |
734 | fn strlen(s: *const c_char) -> usize; |
735 | } |
736 | |
737 | // SAFETY: Outer caller has provided a pointer to a valid C string. |
738 | unsafe { strlen(s) } |
739 | } |
740 | ) |
741 | } |
742 | |
743 | /// An iterator over the bytes of a [`CStr`], without the nul terminator. |
744 | /// |
745 | /// This struct is created by the [`bytes`] method on [`CStr`]. |
746 | /// See its documentation for more. |
747 | /// |
748 | /// [`bytes`]: CStr::bytes |
749 | #[must_use= "iterators are lazy and do nothing unless consumed"] |
750 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
751 | #[derive(Clone, Debug)] |
752 | pub struct Bytes<'a> { |
753 | // since we know the string is nul-terminated, we only need one pointer |
754 | ptr: NonNull<u8>, |
755 | phantom: PhantomData<&'a [c_char]>, |
756 | } |
757 | |
758 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
759 | unsafe impl Send for Bytes<'_> {} |
760 | |
761 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
762 | unsafe impl Sync for Bytes<'_> {} |
763 | |
764 | impl<'a> Bytes<'a> { |
765 | #[inline] |
766 | fn new(s: &'a CStr) -> Self { |
767 | Self { ptr: s.as_non_null_ptr().cast(), phantom: PhantomData } |
768 | } |
769 | |
770 | #[inline] |
771 | fn is_empty(&self) -> bool { |
772 | // SAFETY: We uphold that the pointer is always valid to dereference |
773 | // by starting with a valid C string and then never incrementing beyond |
774 | // the nul terminator. |
775 | unsafe { self.ptr.read() == 0 } |
776 | } |
777 | } |
778 | |
779 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
780 | impl Iterator for Bytes<'_> { |
781 | type Item = u8; |
782 | |
783 | #[inline] |
784 | fn next(&mut self) -> Option<u8> { |
785 | // SAFETY: We only choose a pointer from a valid C string, which must |
786 | // be non-null and contain at least one value. Since we always stop at |
787 | // the nul terminator, which is guaranteed to exist, we can assume that |
788 | // the pointer is non-null and valid. This lets us safely dereference |
789 | // it and assume that adding 1 will create a new, non-null, valid |
790 | // pointer. |
791 | unsafe { |
792 | let ret = self.ptr.read(); |
793 | if ret == 0 { |
794 | None |
795 | } else { |
796 | self.ptr = self.ptr.add(1); |
797 | Some(ret) |
798 | } |
799 | } |
800 | } |
801 | |
802 | #[inline] |
803 | fn size_hint(&self) -> (usize, Option<usize>) { |
804 | if self.is_empty() { (0, Some(0)) } else { (1, None) } |
805 | } |
806 | |
807 | #[inline] |
808 | fn count(self) -> usize { |
809 | // SAFETY: We always hold a valid pointer to a C string |
810 | unsafe { strlen(self.ptr.as_ptr().cast()) } |
811 | } |
812 | } |
813 | |
814 | #[unstable(feature = "cstr_bytes", issue = "112115")] |
815 | impl FusedIterator for Bytes<'_> {} |
816 |
Definitions
- CStr
- inner
- FromBytesWithNulError
- InteriorNul
- position
- NotNulTerminated
- description
- FromBytesUntilNulError
- fmt
- fmt
- default
- fmt
- position
- from_ptr
- from_bytes_until_nul
- from_bytes_with_nul
- from_bytes_with_nul_unchecked
- as_ptr
- as_non_null_ptr
- count_bytes
- is_empty
- to_bytes
- to_bytes_with_nul
- bytes
- to_str
- display
- partial_cmp
- cmp
- Output
- index
- as_ref
- strlen
- strlen
- Bytes
- ptr
- phantom
- new
- is_empty
- Item
- next
- size_hint
Learn Rust with the experts
Find out more