1 | //! [`CStr`] and its related types. |
2 | |
3 | use crate::cmp::Ordering; |
4 | use crate::error::Error; |
5 | use crate::ffi::c_char; |
6 | use crate::intrinsics::const_eval_select; |
7 | use crate::iter::FusedIterator; |
8 | use crate::marker::PhantomData; |
9 | use crate::ptr::NonNull; |
10 | use crate::slice::memchr; |
11 | use crate::{fmt, ops, slice, str}; |
12 | |
13 | // FIXME: because this is doc(inline)d, we *have* to use intra-doc links because the actual link |
14 | // depends on where the item is being documented. however, since this is libcore, we can't |
15 | // actually reference libstd or liballoc in intra-doc links. so, the best we can do is remove the |
16 | // links to `CString` and `String` for now until a solution is developed |
17 | |
18 | /// Representation of a borrowed C string. |
19 | /// |
20 | /// This type represents a borrowed reference to a nul-terminated |
21 | /// array of bytes. It can be constructed safely from a <code>&[[u8]]</code> |
22 | /// slice, or unsafely from a raw `*const c_char`. It can be expressed as a |
23 | /// literal in the form `c"Hello world"`. |
24 | /// |
25 | /// The `CStr` can then be converted to a Rust <code>&[str]</code> by performing |
26 | /// UTF-8 validation, or into an owned `CString`. |
27 | /// |
28 | /// `&CStr` is to `CString` as <code>&[str]</code> is to `String`: the former |
29 | /// in each pair are borrowed references; the latter are owned |
30 | /// strings. |
31 | /// |
32 | /// Note that this structure does **not** have a guaranteed layout (the `repr(transparent)` |
33 | /// notwithstanding) and should not be placed in the signatures of FFI functions. |
34 | /// Instead, safe wrappers of FFI functions may leverage [`CStr::as_ptr`] and the unsafe |
35 | /// [`CStr::from_ptr`] constructor to provide a safe interface to other consumers. |
36 | /// |
37 | /// # Examples |
38 | /// |
39 | /// Inspecting a foreign C string: |
40 | /// |
41 | /// ``` |
42 | /// use std::ffi::CStr; |
43 | /// use std::os::raw::c_char; |
44 | /// |
45 | /// # /* Extern functions are awkward in doc comments - fake it instead |
46 | /// extern "C" { fn my_string() -> *const c_char; } |
47 | /// # */ unsafe extern "C" fn my_string() -> *const c_char { c"hello" .as_ptr() } |
48 | /// |
49 | /// unsafe { |
50 | /// let slice = CStr::from_ptr(my_string()); |
51 | /// println!("string buffer size without nul terminator: {}" , slice.to_bytes().len()); |
52 | /// } |
53 | /// ``` |
54 | /// |
55 | /// Passing a Rust-originating C string: |
56 | /// |
57 | /// ``` |
58 | /// use std::ffi::CStr; |
59 | /// use std::os::raw::c_char; |
60 | /// |
61 | /// fn work(data: &CStr) { |
62 | /// unsafe extern "C" fn work_with(s: *const c_char) {} |
63 | /// unsafe { work_with(data.as_ptr()) } |
64 | /// } |
65 | /// |
66 | /// let s = c"Hello world!" ; |
67 | /// work(&s); |
68 | /// ``` |
69 | /// |
70 | /// Converting a foreign C string into a Rust `String`: |
71 | /// |
72 | /// ``` |
73 | /// use std::ffi::CStr; |
74 | /// use std::os::raw::c_char; |
75 | /// |
76 | /// # /* Extern functions are awkward in doc comments - fake it instead |
77 | /// extern "C" { fn my_string() -> *const c_char; } |
78 | /// # */ unsafe extern "C" fn my_string() -> *const c_char { c"hello" .as_ptr() } |
79 | /// |
80 | /// fn my_string_safe() -> String { |
81 | /// let cstr = unsafe { CStr::from_ptr(my_string()) }; |
82 | /// // Get copy-on-write Cow<'_, str>, then guarantee a freshly-owned String allocation |
83 | /// String::from_utf8_lossy(cstr.to_bytes()).to_string() |
84 | /// } |
85 | /// |
86 | /// println!("string: {}" , my_string_safe()); |
87 | /// ``` |
88 | /// |
89 | /// [str]: prim@str "str" |
90 | #[derive (PartialEq, Eq, Hash)] |
91 | #[stable (feature = "core_c_str" , since = "1.64.0" )] |
92 | #[rustc_diagnostic_item = "cstr_type" ] |
93 | #[rustc_has_incoherent_inherent_impls ] |
94 | #[lang = "CStr" ] |
95 | // `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies |
96 | // on `CStr` being layout-compatible with `[u8]`. |
97 | // However, `CStr` layout is considered an implementation detail and must not be relied upon. We |
98 | // want `repr(transparent)` but we don't want it to show up in rustdoc, so we hide it under |
99 | // `cfg(doc)`. This is an ad-hoc implementation of attribute privacy. |
100 | #[repr (transparent)] |
101 | pub struct CStr { |
102 | // FIXME: this should not be represented with a DST slice but rather with |
103 | // just a raw `c_char` along with some form of marker to make |
104 | // this an unsized type. Essentially `sizeof(&CStr)` should be the |
105 | // same as `sizeof(&c_char)` but `CStr` should be an unsized type. |
106 | inner: [c_char], |
107 | } |
108 | |
109 | /// An error indicating that a nul byte was not in the expected position. |
110 | /// |
111 | /// The slice used to create a [`CStr`] must have one and only one nul byte, |
112 | /// positioned at the end. |
113 | /// |
114 | /// This error is created by the [`CStr::from_bytes_with_nul`] method. |
115 | /// See its documentation for more. |
116 | /// |
117 | /// # Examples |
118 | /// |
119 | /// ``` |
120 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
121 | /// |
122 | /// let _: FromBytesWithNulError = CStr::from_bytes_with_nul(b"f \0oo" ).unwrap_err(); |
123 | /// ``` |
124 | #[derive (Clone, Copy, PartialEq, Eq, Debug)] |
125 | #[stable (feature = "core_c_str" , since = "1.64.0" )] |
126 | pub enum FromBytesWithNulError { |
127 | /// Data provided contains an interior nul byte at byte `position`. |
128 | InteriorNul { |
129 | /// The position of the interior nul byte. |
130 | position: usize, |
131 | }, |
132 | /// Data provided is not nul terminated. |
133 | NotNulTerminated, |
134 | } |
135 | |
136 | #[stable (feature = "frombyteswithnulerror_impls" , since = "1.17.0" )] |
137 | impl Error for FromBytesWithNulError { |
138 | #[allow (deprecated)] |
139 | fn description(&self) -> &str { |
140 | match self { |
141 | Self::InteriorNul { .. } => "data provided contains an interior nul byte" , |
142 | Self::NotNulTerminated => "data provided is not nul terminated" , |
143 | } |
144 | } |
145 | } |
146 | |
147 | /// An error indicating that no nul byte was present. |
148 | /// |
149 | /// A slice used to create a [`CStr`] must contain a nul byte somewhere |
150 | /// within the slice. |
151 | /// |
152 | /// This error is created by the [`CStr::from_bytes_until_nul`] method. |
153 | /// |
154 | #[derive (Clone, PartialEq, Eq, Debug)] |
155 | #[stable (feature = "cstr_from_bytes_until_nul" , since = "1.69.0" )] |
156 | pub struct FromBytesUntilNulError(()); |
157 | |
158 | #[stable (feature = "cstr_from_bytes_until_nul" , since = "1.69.0" )] |
159 | impl fmt::Display for FromBytesUntilNulError { |
160 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
161 | write!(f, "data provided does not contain a nul" ) |
162 | } |
163 | } |
164 | |
165 | #[stable (feature = "cstr_debug" , since = "1.3.0" )] |
166 | impl fmt::Debug for CStr { |
167 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
168 | write!(f, " \"{}\"" , self.to_bytes().escape_ascii()) |
169 | } |
170 | } |
171 | |
172 | #[stable (feature = "cstr_default" , since = "1.10.0" )] |
173 | impl Default for &CStr { |
174 | #[inline ] |
175 | fn default() -> Self { |
176 | const SLICE: &[c_char] = &[0]; |
177 | // SAFETY: `SLICE` is indeed pointing to a valid nul-terminated string. |
178 | unsafe { CStr::from_ptr(SLICE.as_ptr()) } |
179 | } |
180 | } |
181 | |
182 | #[stable (feature = "frombyteswithnulerror_impls" , since = "1.17.0" )] |
183 | impl fmt::Display for FromBytesWithNulError { |
184 | #[allow (deprecated, deprecated_in_future)] |
185 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
186 | f.write_str(self.description())?; |
187 | if let Self::InteriorNul { position: &usize } = self { |
188 | write!(f, " at byte pos {position}" )?; |
189 | } |
190 | Ok(()) |
191 | } |
192 | } |
193 | |
194 | impl CStr { |
195 | /// Wraps a raw C string with a safe C string wrapper. |
196 | /// |
197 | /// This function will wrap the provided `ptr` with a `CStr` wrapper, which |
198 | /// allows inspection and interoperation of non-owned C strings. The total |
199 | /// size of the terminated buffer must be smaller than [`isize::MAX`] **bytes** |
200 | /// in memory (a restriction from [`slice::from_raw_parts`]). |
201 | /// |
202 | /// # Safety |
203 | /// |
204 | /// * The memory pointed to by `ptr` must contain a valid nul terminator at the |
205 | /// end of the string. |
206 | /// |
207 | /// * `ptr` must be [valid] for reads of bytes up to and including the nul terminator. |
208 | /// This means in particular: |
209 | /// |
210 | /// * The entire memory range of this `CStr` must be contained within a single allocated object! |
211 | /// * `ptr` must be non-null even for a zero-length cstr. |
212 | /// |
213 | /// * The memory referenced by the returned `CStr` must not be mutated for |
214 | /// the duration of lifetime `'a`. |
215 | /// |
216 | /// * The nul terminator must be within `isize::MAX` from `ptr` |
217 | /// |
218 | /// > **Note**: This operation is intended to be a 0-cost cast but it is |
219 | /// > currently implemented with an up-front calculation of the length of |
220 | /// > the string. This is not guaranteed to always be the case. |
221 | /// |
222 | /// # Caveat |
223 | /// |
224 | /// The lifetime for the returned slice is inferred from its usage. To prevent accidental misuse, |
225 | /// it's suggested to tie the lifetime to whichever source lifetime is safe in the context, |
226 | /// such as by providing a helper function taking the lifetime of a host value for the slice, |
227 | /// or by explicit annotation. |
228 | /// |
229 | /// # Examples |
230 | /// |
231 | /// ``` |
232 | /// use std::ffi::{c_char, CStr}; |
233 | /// |
234 | /// fn my_string() -> *const c_char { |
235 | /// c"hello" .as_ptr() |
236 | /// } |
237 | /// |
238 | /// unsafe { |
239 | /// let slice = CStr::from_ptr(my_string()); |
240 | /// assert_eq!(slice.to_str().unwrap(), "hello" ); |
241 | /// } |
242 | /// ``` |
243 | /// |
244 | /// ``` |
245 | /// use std::ffi::{c_char, CStr}; |
246 | /// |
247 | /// const HELLO_PTR: *const c_char = { |
248 | /// const BYTES: &[u8] = b"Hello, world! \0" ; |
249 | /// BYTES.as_ptr().cast() |
250 | /// }; |
251 | /// const HELLO: &CStr = unsafe { CStr::from_ptr(HELLO_PTR) }; |
252 | /// |
253 | /// assert_eq!(c"Hello, world!" , HELLO); |
254 | /// ``` |
255 | /// |
256 | /// [valid]: core::ptr#safety |
257 | #[inline ] // inline is necessary for codegen to see strlen. |
258 | #[must_use ] |
259 | #[stable (feature = "rust1" , since = "1.0.0" )] |
260 | #[rustc_const_stable (feature = "const_cstr_from_ptr" , since = "1.81.0" )] |
261 | pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { |
262 | // SAFETY: The caller has provided a pointer that points to a valid C |
263 | // string with a NUL terminator less than `isize::MAX` from `ptr`. |
264 | let len = unsafe { strlen(ptr) }; |
265 | |
266 | // SAFETY: The caller has provided a valid pointer with length less than |
267 | // `isize::MAX`, so `from_raw_parts` is safe. The content remains valid |
268 | // and doesn't change for the lifetime of the returned `CStr`. This |
269 | // means the call to `from_bytes_with_nul_unchecked` is correct. |
270 | // |
271 | // The cast from c_char to u8 is ok because a c_char is always one byte. |
272 | unsafe { Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1)) } |
273 | } |
274 | |
275 | /// Creates a C string wrapper from a byte slice with any number of nuls. |
276 | /// |
277 | /// This method will create a `CStr` from any byte slice that contains at |
278 | /// least one nul byte. Unlike with [`CStr::from_bytes_with_nul`], the caller |
279 | /// does not need to know where the nul byte is located. |
280 | /// |
281 | /// If the first byte is a nul character, this method will return an |
282 | /// empty `CStr`. If multiple nul characters are present, the `CStr` will |
283 | /// end at the first one. |
284 | /// |
285 | /// If the slice only has a single nul byte at the end, this method is |
286 | /// equivalent to [`CStr::from_bytes_with_nul`]. |
287 | /// |
288 | /// # Examples |
289 | /// ``` |
290 | /// use std::ffi::CStr; |
291 | /// |
292 | /// let mut buffer = [0u8; 16]; |
293 | /// unsafe { |
294 | /// // Here we might call an unsafe C function that writes a string |
295 | /// // into the buffer. |
296 | /// let buf_ptr = buffer.as_mut_ptr(); |
297 | /// buf_ptr.write_bytes(b'A' , 8); |
298 | /// } |
299 | /// // Attempt to extract a C nul-terminated string from the buffer. |
300 | /// let c_str = CStr::from_bytes_until_nul(&buffer[..]).unwrap(); |
301 | /// assert_eq!(c_str.to_str().unwrap(), "AAAAAAAA" ); |
302 | /// ``` |
303 | /// |
304 | #[stable (feature = "cstr_from_bytes_until_nul" , since = "1.69.0" )] |
305 | #[rustc_const_stable (feature = "cstr_from_bytes_until_nul" , since = "1.69.0" )] |
306 | pub const fn from_bytes_until_nul(bytes: &[u8]) -> Result<&CStr, FromBytesUntilNulError> { |
307 | let nul_pos = memchr::memchr(0, bytes); |
308 | match nul_pos { |
309 | Some(nul_pos) => { |
310 | // FIXME(const-hack) replace with range index |
311 | // SAFETY: nul_pos + 1 <= bytes.len() |
312 | let subslice = unsafe { crate::slice::from_raw_parts(bytes.as_ptr(), nul_pos + 1) }; |
313 | // SAFETY: We know there is a nul byte at nul_pos, so this slice |
314 | // (ending at the nul byte) is a well-formed C string. |
315 | Ok(unsafe { CStr::from_bytes_with_nul_unchecked(subslice) }) |
316 | } |
317 | None => Err(FromBytesUntilNulError(())), |
318 | } |
319 | } |
320 | |
321 | /// Creates a C string wrapper from a byte slice with exactly one nul |
322 | /// terminator. |
323 | /// |
324 | /// This function will cast the provided `bytes` to a `CStr` |
325 | /// wrapper after ensuring that the byte slice is nul-terminated |
326 | /// and does not contain any interior nul bytes. |
327 | /// |
328 | /// If the nul byte may not be at the end, |
329 | /// [`CStr::from_bytes_until_nul`] can be used instead. |
330 | /// |
331 | /// # Examples |
332 | /// |
333 | /// ``` |
334 | /// use std::ffi::CStr; |
335 | /// |
336 | /// let cstr = CStr::from_bytes_with_nul(b"hello \0" ); |
337 | /// assert_eq!(cstr, Ok(c"hello" )); |
338 | /// ``` |
339 | /// |
340 | /// Creating a `CStr` without a trailing nul terminator is an error: |
341 | /// |
342 | /// ``` |
343 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
344 | /// |
345 | /// let cstr = CStr::from_bytes_with_nul(b"hello" ); |
346 | /// assert_eq!(cstr, Err(FromBytesWithNulError::NotNulTerminated)); |
347 | /// ``` |
348 | /// |
349 | /// Creating a `CStr` with an interior nul byte is an error: |
350 | /// |
351 | /// ``` |
352 | /// use std::ffi::{CStr, FromBytesWithNulError}; |
353 | /// |
354 | /// let cstr = CStr::from_bytes_with_nul(b"he \0llo \0" ); |
355 | /// assert_eq!(cstr, Err(FromBytesWithNulError::InteriorNul { position: 2 })); |
356 | /// ``` |
357 | #[stable (feature = "cstr_from_bytes" , since = "1.10.0" )] |
358 | #[rustc_const_stable (feature = "const_cstr_methods" , since = "1.72.0" )] |
359 | pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, FromBytesWithNulError> { |
360 | let nul_pos = memchr::memchr(0, bytes); |
361 | match nul_pos { |
362 | Some(nul_pos) if nul_pos + 1 == bytes.len() => { |
363 | // SAFETY: We know there is only one nul byte, at the end |
364 | // of the byte slice. |
365 | Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) |
366 | } |
367 | Some(position) => Err(FromBytesWithNulError::InteriorNul { position }), |
368 | None => Err(FromBytesWithNulError::NotNulTerminated), |
369 | } |
370 | } |
371 | |
372 | /// Unsafely creates a C string wrapper from a byte slice. |
373 | /// |
374 | /// This function will cast the provided `bytes` to a `CStr` wrapper without |
375 | /// performing any sanity checks. |
376 | /// |
377 | /// # Safety |
378 | /// The provided slice **must** be nul-terminated and not contain any interior |
379 | /// nul bytes. |
380 | /// |
381 | /// # Examples |
382 | /// |
383 | /// ``` |
384 | /// use std::ffi::CStr; |
385 | /// |
386 | /// let bytes = b"Hello world! \0" ; |
387 | /// |
388 | /// let cstr = unsafe { CStr::from_bytes_with_nul_unchecked(bytes) }; |
389 | /// assert_eq!(cstr.to_bytes_with_nul(), bytes); |
390 | /// ``` |
391 | #[inline ] |
392 | #[must_use ] |
393 | #[stable (feature = "cstr_from_bytes" , since = "1.10.0" )] |
394 | #[rustc_const_stable (feature = "const_cstr_unchecked" , since = "1.59.0" )] |
395 | #[rustc_allow_const_fn_unstable (const_eval_select)] |
396 | pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { |
397 | const_eval_select!( |
398 | @capture { bytes: &[u8] } -> &CStr: |
399 | if const { |
400 | // Saturating so that an empty slice panics in the assert with a good |
401 | // message, not here due to underflow. |
402 | let mut i = bytes.len().saturating_sub(1); |
403 | assert!(!bytes.is_empty() && bytes[i] == 0, "input was not nul-terminated" ); |
404 | |
405 | // Ending nul byte exists, skip to the rest. |
406 | while i != 0 { |
407 | i -= 1; |
408 | let byte = bytes[i]; |
409 | assert!(byte != 0, "input contained interior nul" ); |
410 | } |
411 | |
412 | // SAFETY: See runtime cast comment below. |
413 | unsafe { &*(bytes as *const [u8] as *const CStr) } |
414 | } else { |
415 | // Chance at catching some UB at runtime with debug builds. |
416 | debug_assert!(!bytes.is_empty() && bytes[bytes.len() - 1] == 0); |
417 | |
418 | // SAFETY: Casting to CStr is safe because its internal representation |
419 | // is a [u8] too (safe only inside std). |
420 | // Dereferencing the obtained pointer is safe because it comes from a |
421 | // reference. Making a reference is then safe because its lifetime |
422 | // is bound by the lifetime of the given `bytes`. |
423 | unsafe { &*(bytes as *const [u8] as *const CStr) } |
424 | } |
425 | ) |
426 | } |
427 | |
428 | /// Returns the inner pointer to this C string. |
429 | /// |
430 | /// The returned pointer will be valid for as long as `self` is, and points |
431 | /// to a contiguous region of memory terminated with a 0 byte to represent |
432 | /// the end of the string. |
433 | /// |
434 | /// The type of the returned pointer is |
435 | /// [`*const c_char`][crate::ffi::c_char], and whether it's |
436 | /// an alias for `*const i8` or `*const u8` is platform-specific. |
437 | /// |
438 | /// **WARNING** |
439 | /// |
440 | /// The returned pointer is read-only; writing to it (including passing it |
441 | /// to C code that writes to it) causes undefined behavior. |
442 | /// |
443 | /// It is your responsibility to make sure that the underlying memory is not |
444 | /// freed too early. For example, the following code will cause undefined |
445 | /// behavior when `ptr` is used inside the `unsafe` block: |
446 | /// |
447 | /// ```no_run |
448 | /// # #![expect (dangling_pointers_from_temporaries)] |
449 | /// use std::ffi::{CStr, CString}; |
450 | /// |
451 | /// // 💀 The meaning of this entire program is undefined, |
452 | /// // 💀 and nothing about its behavior is guaranteed, |
453 | /// // 💀 not even that its behavior resembles the code as written, |
454 | /// // 💀 just because it contains a single instance of undefined behavior! |
455 | /// |
456 | /// // 🚨 creates a dangling pointer to a temporary `CString` |
457 | /// // 🚨 that is deallocated at the end of the statement |
458 | /// let ptr = CString::new("Hi!" .to_uppercase()).unwrap().as_ptr(); |
459 | /// |
460 | /// // without undefined behavior, you would expect that `ptr` equals: |
461 | /// dbg!(CStr::from_bytes_with_nul(b"HI! \0" ).unwrap()); |
462 | /// |
463 | /// // 🙏 Possibly the program behaved as expected so far, |
464 | /// // 🙏 and this just shows `ptr` is now garbage..., but |
465 | /// // 💀 this violates `CStr::from_ptr`'s safety contract |
466 | /// // 💀 leading to a dereference of a dangling pointer, |
467 | /// // 💀 which is immediate undefined behavior. |
468 | /// // 💀 *BOOM*, you're dead, you're entire program has no meaning. |
469 | /// dbg!(unsafe { CStr::from_ptr(ptr) }); |
470 | /// ``` |
471 | /// |
472 | /// This happens because, the pointer returned by `as_ptr` does not carry any |
473 | /// lifetime information, and the `CString` is deallocated immediately after |
474 | /// the expression that it is part of has been evaluated. |
475 | /// To fix the problem, bind the `CString` to a local variable: |
476 | /// |
477 | /// ``` |
478 | /// use std::ffi::{CStr, CString}; |
479 | /// |
480 | /// let c_str = CString::new("Hi!" .to_uppercase()).unwrap(); |
481 | /// let ptr = c_str.as_ptr(); |
482 | /// |
483 | /// assert_eq!(unsafe { CStr::from_ptr(ptr) }, c"HI!" ); |
484 | /// ``` |
485 | #[inline ] |
486 | #[must_use ] |
487 | #[stable (feature = "rust1" , since = "1.0.0" )] |
488 | #[rustc_const_stable (feature = "const_str_as_ptr" , since = "1.32.0" )] |
489 | #[rustc_as_ptr] |
490 | #[rustc_never_returns_null_ptr ] |
491 | pub const fn as_ptr(&self) -> *const c_char { |
492 | self.inner.as_ptr() |
493 | } |
494 | |
495 | /// We could eventually expose this publicly, if we wanted. |
496 | #[inline ] |
497 | #[must_use ] |
498 | const fn as_non_null_ptr(&self) -> NonNull<c_char> { |
499 | // FIXME(const_trait_impl) replace with `NonNull::from` |
500 | // SAFETY: a reference is never null |
501 | unsafe { NonNull::new_unchecked(&self.inner as *const [c_char] as *mut [c_char]) } |
502 | .as_non_null_ptr() |
503 | } |
504 | |
505 | /// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator. |
506 | /// |
507 | /// > **Note**: This method is currently implemented as a constant-time |
508 | /// > cast, but it is planned to alter its definition in the future to |
509 | /// > perform the length calculation whenever this method is called. |
510 | /// |
511 | /// # Examples |
512 | /// |
513 | /// ``` |
514 | /// use std::ffi::CStr; |
515 | /// |
516 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" ).unwrap(); |
517 | /// assert_eq!(cstr.count_bytes(), 3); |
518 | /// |
519 | /// let cstr = CStr::from_bytes_with_nul(b" \0" ).unwrap(); |
520 | /// assert_eq!(cstr.count_bytes(), 0); |
521 | /// ``` |
522 | #[inline ] |
523 | #[must_use ] |
524 | #[doc (alias("len" , "strlen" ))] |
525 | #[stable (feature = "cstr_count_bytes" , since = "1.79.0" )] |
526 | #[rustc_const_stable (feature = "const_cstr_from_ptr" , since = "1.81.0" )] |
527 | pub const fn count_bytes(&self) -> usize { |
528 | self.inner.len() - 1 |
529 | } |
530 | |
531 | /// Returns `true` if `self.to_bytes()` has a length of 0. |
532 | /// |
533 | /// # Examples |
534 | /// |
535 | /// ``` |
536 | /// use std::ffi::CStr; |
537 | /// # use std::ffi::FromBytesWithNulError; |
538 | /// |
539 | /// # fn main() { test().unwrap(); } |
540 | /// # fn test() -> Result<(), FromBytesWithNulError> { |
541 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" )?; |
542 | /// assert!(!cstr.is_empty()); |
543 | /// |
544 | /// let empty_cstr = CStr::from_bytes_with_nul(b" \0" )?; |
545 | /// assert!(empty_cstr.is_empty()); |
546 | /// assert!(c"" .is_empty()); |
547 | /// # Ok(()) |
548 | /// # } |
549 | /// ``` |
550 | #[inline ] |
551 | #[stable (feature = "cstr_is_empty" , since = "1.71.0" )] |
552 | #[rustc_const_stable (feature = "cstr_is_empty" , since = "1.71.0" )] |
553 | pub const fn is_empty(&self) -> bool { |
554 | // SAFETY: We know there is at least one byte; for empty strings it |
555 | // is the NUL terminator. |
556 | // FIXME(const-hack): use get_unchecked |
557 | unsafe { *self.inner.as_ptr() == 0 } |
558 | } |
559 | |
560 | /// Converts this C string to a byte slice. |
561 | /// |
562 | /// The returned slice will **not** contain the trailing nul terminator that this C |
563 | /// string has. |
564 | /// |
565 | /// > **Note**: This method is currently implemented as a constant-time |
566 | /// > cast, but it is planned to alter its definition in the future to |
567 | /// > perform the length calculation whenever this method is called. |
568 | /// |
569 | /// # Examples |
570 | /// |
571 | /// ``` |
572 | /// use std::ffi::CStr; |
573 | /// |
574 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" ).expect("CStr::from_bytes_with_nul failed" ); |
575 | /// assert_eq!(cstr.to_bytes(), b"foo" ); |
576 | /// ``` |
577 | #[inline ] |
578 | #[must_use = "this returns the result of the operation, \ |
579 | without modifying the original" ] |
580 | #[stable (feature = "rust1" , since = "1.0.0" )] |
581 | #[rustc_const_stable (feature = "const_cstr_methods" , since = "1.72.0" )] |
582 | pub const fn to_bytes(&self) -> &[u8] { |
583 | let bytes = self.to_bytes_with_nul(); |
584 | // FIXME(const-hack) replace with range index |
585 | // SAFETY: to_bytes_with_nul returns slice with length at least 1 |
586 | unsafe { slice::from_raw_parts(bytes.as_ptr(), bytes.len() - 1) } |
587 | } |
588 | |
589 | /// Converts this C string to a byte slice containing the trailing 0 byte. |
590 | /// |
591 | /// This function is the equivalent of [`CStr::to_bytes`] except that it |
592 | /// will retain the trailing nul terminator instead of chopping it off. |
593 | /// |
594 | /// > **Note**: This method is currently implemented as a 0-cost cast, but |
595 | /// > it is planned to alter its definition in the future to perform the |
596 | /// > length calculation whenever this method is called. |
597 | /// |
598 | /// # Examples |
599 | /// |
600 | /// ``` |
601 | /// use std::ffi::CStr; |
602 | /// |
603 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" ).expect("CStr::from_bytes_with_nul failed" ); |
604 | /// assert_eq!(cstr.to_bytes_with_nul(), b"foo \0" ); |
605 | /// ``` |
606 | #[inline ] |
607 | #[must_use = "this returns the result of the operation, \ |
608 | without modifying the original" ] |
609 | #[stable (feature = "rust1" , since = "1.0.0" )] |
610 | #[rustc_const_stable (feature = "const_cstr_methods" , since = "1.72.0" )] |
611 | pub const fn to_bytes_with_nul(&self) -> &[u8] { |
612 | // SAFETY: Transmuting a slice of `c_char`s to a slice of `u8`s |
613 | // is safe on all supported targets. |
614 | unsafe { &*((&raw const self.inner) as *const [u8]) } |
615 | } |
616 | |
617 | /// Iterates over the bytes in this C string. |
618 | /// |
619 | /// The returned iterator will **not** contain the trailing nul terminator |
620 | /// that this C string has. |
621 | /// |
622 | /// # Examples |
623 | /// |
624 | /// ``` |
625 | /// #![feature(cstr_bytes)] |
626 | /// use std::ffi::CStr; |
627 | /// |
628 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" ).expect("CStr::from_bytes_with_nul failed" ); |
629 | /// assert!(cstr.bytes().eq(*b"foo" )); |
630 | /// ``` |
631 | #[inline ] |
632 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
633 | pub fn bytes(&self) -> Bytes<'_> { |
634 | Bytes::new(self) |
635 | } |
636 | |
637 | /// Yields a <code>&[str]</code> slice if the `CStr` contains valid UTF-8. |
638 | /// |
639 | /// If the contents of the `CStr` are valid UTF-8 data, this |
640 | /// function will return the corresponding <code>&[str]</code> slice. Otherwise, |
641 | /// it will return an error with details of where UTF-8 validation failed. |
642 | /// |
643 | /// [str]: prim@str "str" |
644 | /// |
645 | /// # Examples |
646 | /// |
647 | /// ``` |
648 | /// use std::ffi::CStr; |
649 | /// |
650 | /// let cstr = CStr::from_bytes_with_nul(b"foo \0" ).expect("CStr::from_bytes_with_nul failed" ); |
651 | /// assert_eq!(cstr.to_str(), Ok("foo" )); |
652 | /// ``` |
653 | #[stable (feature = "cstr_to_str" , since = "1.4.0" )] |
654 | #[rustc_const_stable (feature = "const_cstr_methods" , since = "1.72.0" )] |
655 | pub const fn to_str(&self) -> Result<&str, str::Utf8Error> { |
656 | // N.B., when `CStr` is changed to perform the length check in `.to_bytes()` |
657 | // instead of in `from_ptr()`, it may be worth considering if this should |
658 | // be rewritten to do the UTF-8 check inline with the length calculation |
659 | // instead of doing it afterwards. |
660 | str::from_utf8(self.to_bytes()) |
661 | } |
662 | } |
663 | |
664 | // `.to_bytes()` representations are compared instead of the inner `[c_char]`s, |
665 | // because `c_char` is `i8` (not `u8`) on some platforms. |
666 | // That is why this is implemented manually and not derived. |
667 | #[stable (feature = "rust1" , since = "1.0.0" )] |
668 | impl PartialOrd for CStr { |
669 | #[inline ] |
670 | fn partial_cmp(&self, other: &CStr) -> Option<Ordering> { |
671 | self.to_bytes().partial_cmp(&other.to_bytes()) |
672 | } |
673 | } |
674 | #[stable (feature = "rust1" , since = "1.0.0" )] |
675 | impl Ord for CStr { |
676 | #[inline ] |
677 | fn cmp(&self, other: &CStr) -> Ordering { |
678 | self.to_bytes().cmp(&other.to_bytes()) |
679 | } |
680 | } |
681 | |
682 | #[stable (feature = "cstr_range_from" , since = "1.47.0" )] |
683 | impl ops::Index<ops::RangeFrom<usize>> for CStr { |
684 | type Output = CStr; |
685 | |
686 | #[inline ] |
687 | fn index(&self, index: ops::RangeFrom<usize>) -> &CStr { |
688 | let bytes: &[u8] = self.to_bytes_with_nul(); |
689 | // we need to manually check the starting index to account for the null |
690 | // byte, since otherwise we could get an empty string that doesn't end |
691 | // in a null. |
692 | if index.start < bytes.len() { |
693 | // SAFETY: Non-empty tail of a valid `CStr` is still a valid `CStr`. |
694 | unsafe { CStr::from_bytes_with_nul_unchecked(&bytes[index.start..]) } |
695 | } else { |
696 | panic!( |
697 | "index out of bounds: the len is {} but the index is {}" , |
698 | bytes.len(), |
699 | index.start |
700 | ); |
701 | } |
702 | } |
703 | } |
704 | |
705 | #[stable (feature = "cstring_asref" , since = "1.7.0" )] |
706 | impl AsRef<CStr> for CStr { |
707 | #[inline ] |
708 | fn as_ref(&self) -> &CStr { |
709 | self |
710 | } |
711 | } |
712 | |
713 | /// Calculate the length of a nul-terminated string. Defers to C's `strlen` when possible. |
714 | /// |
715 | /// # Safety |
716 | /// |
717 | /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be |
718 | /// located within `isize::MAX` from `ptr`. |
719 | #[inline ] |
720 | #[unstable (feature = "cstr_internals" , issue = "none" )] |
721 | #[rustc_allow_const_fn_unstable (const_eval_select)] |
722 | const unsafe fn strlen(ptr: *const c_char) -> usize { |
723 | const_eval_select!( |
724 | @capture { s: *const c_char = ptr } -> usize: |
725 | if const { |
726 | let mut len = 0; |
727 | |
728 | // SAFETY: Outer caller has provided a pointer to a valid C string. |
729 | while unsafe { *s.add(len) } != 0 { |
730 | len += 1; |
731 | } |
732 | |
733 | len |
734 | } else { |
735 | unsafe extern "C" { |
736 | /// Provided by libc or compiler_builtins. |
737 | fn strlen(s: *const c_char) -> usize; |
738 | } |
739 | |
740 | // SAFETY: Outer caller has provided a pointer to a valid C string. |
741 | unsafe { strlen(s) } |
742 | } |
743 | ) |
744 | } |
745 | |
746 | /// An iterator over the bytes of a [`CStr`], without the nul terminator. |
747 | /// |
748 | /// This struct is created by the [`bytes`] method on [`CStr`]. |
749 | /// See its documentation for more. |
750 | /// |
751 | /// [`bytes`]: CStr::bytes |
752 | #[must_use = "iterators are lazy and do nothing unless consumed" ] |
753 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
754 | #[derive (Clone, Debug)] |
755 | pub struct Bytes<'a> { |
756 | // since we know the string is nul-terminated, we only need one pointer |
757 | ptr: NonNull<u8>, |
758 | phantom: PhantomData<&'a [c_char]>, |
759 | } |
760 | |
761 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
762 | unsafe impl Send for Bytes<'_> {} |
763 | |
764 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
765 | unsafe impl Sync for Bytes<'_> {} |
766 | |
767 | impl<'a> Bytes<'a> { |
768 | #[inline ] |
769 | fn new(s: &'a CStr) -> Self { |
770 | Self { ptr: s.as_non_null_ptr().cast(), phantom: PhantomData } |
771 | } |
772 | |
773 | #[inline ] |
774 | fn is_empty(&self) -> bool { |
775 | // SAFETY: We uphold that the pointer is always valid to dereference |
776 | // by starting with a valid C string and then never incrementing beyond |
777 | // the nul terminator. |
778 | unsafe { self.ptr.read() == 0 } |
779 | } |
780 | } |
781 | |
782 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
783 | impl Iterator for Bytes<'_> { |
784 | type Item = u8; |
785 | |
786 | #[inline ] |
787 | fn next(&mut self) -> Option<u8> { |
788 | // SAFETY: We only choose a pointer from a valid C string, which must |
789 | // be non-null and contain at least one value. Since we always stop at |
790 | // the nul terminator, which is guaranteed to exist, we can assume that |
791 | // the pointer is non-null and valid. This lets us safely dereference |
792 | // it and assume that adding 1 will create a new, non-null, valid |
793 | // pointer. |
794 | unsafe { |
795 | let ret = self.ptr.read(); |
796 | if ret == 0 { |
797 | None |
798 | } else { |
799 | self.ptr = self.ptr.add(1); |
800 | Some(ret) |
801 | } |
802 | } |
803 | } |
804 | |
805 | #[inline ] |
806 | fn size_hint(&self) -> (usize, Option<usize>) { |
807 | if self.is_empty() { (0, Some(0)) } else { (1, None) } |
808 | } |
809 | |
810 | #[inline ] |
811 | fn count(self) -> usize { |
812 | // SAFETY: We always hold a valid pointer to a C string |
813 | unsafe { strlen(self.ptr.as_ptr().cast()) } |
814 | } |
815 | } |
816 | |
817 | #[unstable (feature = "cstr_bytes" , issue = "112115" )] |
818 | impl FusedIterator for Bytes<'_> {} |
819 | |