| 1 | //! Utilities related to FFI bindings. |
| 2 | //! |
| 3 | //! This module provides utilities to handle data across non-Rust |
| 4 | //! interfaces, like other programming languages and the underlying |
| 5 | //! operating system. It is mainly of use for FFI (Foreign Function |
| 6 | //! Interface) bindings and code that needs to exchange C-like strings |
| 7 | //! with other languages. |
| 8 | //! |
| 9 | //! # Overview |
| 10 | //! |
| 11 | //! Rust represents owned strings with the [`String`] type, and |
| 12 | //! borrowed slices of strings with the [`str`] primitive. Both are |
| 13 | //! always in UTF-8 encoding, and may contain nul bytes in the middle, |
| 14 | //! i.e., if you look at the bytes that make up the string, there may |
| 15 | //! be a `\0` among them. Both `String` and `str` store their length |
| 16 | //! explicitly; there are no nul terminators at the end of strings |
| 17 | //! like in C. |
| 18 | //! |
| 19 | //! C strings are different from Rust strings: |
| 20 | //! |
| 21 | //! * **Encodings** - Rust strings are UTF-8, but C strings may use |
| 22 | //! other encodings. If you are using a string from C, you should |
| 23 | //! check its encoding explicitly, rather than just assuming that it |
| 24 | //! is UTF-8 like you can do in Rust. |
| 25 | //! |
| 26 | //! * **Character size** - C strings may use `char` or `wchar_t`-sized |
| 27 | //! characters; please **note** that C's `char` is different from Rust's. |
| 28 | //! The C standard leaves the actual sizes of those types open to |
| 29 | //! interpretation, but defines different APIs for strings made up of |
| 30 | //! each character type. Rust strings are always UTF-8, so different |
| 31 | //! Unicode characters will be encoded in a variable number of bytes |
| 32 | //! each. The Rust type [`char`] represents a '[Unicode scalar |
| 33 | //! value]', which is similar to, but not the same as, a '[Unicode |
| 34 | //! code point]'. |
| 35 | //! |
| 36 | //! * **Nul terminators and implicit string lengths** - Often, C |
| 37 | //! strings are nul-terminated, i.e., they have a `\0` character at the |
| 38 | //! end. The length of a string buffer is not stored, but has to be |
| 39 | //! calculated; to compute the length of a string, C code must |
| 40 | //! manually call a function like `strlen()` for `char`-based strings, |
| 41 | //! or `wcslen()` for `wchar_t`-based ones. Those functions return |
| 42 | //! the number of characters in the string excluding the nul |
| 43 | //! terminator, so the buffer length is really `len+1` characters. |
| 44 | //! Rust strings don't have a nul terminator; their length is always |
| 45 | //! stored and does not need to be calculated. While in Rust |
| 46 | //! accessing a string's length is an *O*(1) operation (because the |
| 47 | //! length is stored); in C it is an *O*(*n*) operation because the |
| 48 | //! length needs to be computed by scanning the string for the nul |
| 49 | //! terminator. |
| 50 | //! |
| 51 | //! * **Internal nul characters** - When C strings have a nul |
| 52 | //! terminator character, this usually means that they cannot have nul |
| 53 | //! characters in the middle — a nul character would essentially |
| 54 | //! truncate the string. Rust strings *can* have nul characters in |
| 55 | //! the middle, because nul does not have to mark the end of the |
| 56 | //! string in Rust. |
| 57 | //! |
| 58 | //! # Representations of non-Rust strings |
| 59 | //! |
| 60 | //! [`CString`] and [`CStr`] are useful when you need to transfer |
| 61 | //! UTF-8 strings to and from languages with a C ABI, like Python. |
| 62 | //! |
| 63 | //! * **From Rust to C:** [`CString`] represents an owned, C-friendly |
| 64 | //! string: it is nul-terminated, and has no internal nul characters. |
| 65 | //! Rust code can create a [`CString`] out of a normal string (provided |
| 66 | //! that the string doesn't have nul characters in the middle), and |
| 67 | //! then use a variety of methods to obtain a raw <code>\*mut [u8]</code> that can |
| 68 | //! then be passed as an argument to functions which use the C |
| 69 | //! conventions for strings. |
| 70 | //! |
| 71 | //! * **From C to Rust:** [`CStr`] represents a borrowed C string; it |
| 72 | //! is what you would use to wrap a raw <code>\*const [u8]</code> that you got from |
| 73 | //! a C function. A [`CStr`] is guaranteed to be a nul-terminated array |
| 74 | //! of bytes. Once you have a [`CStr`], you can convert it to a Rust |
| 75 | //! <code>&[str]</code> if it's valid UTF-8, or lossily convert it by adding |
| 76 | //! replacement characters. |
| 77 | //! |
| 78 | //! [`OsString`] and [`OsStr`] are useful when you need to transfer |
| 79 | //! strings to and from the operating system itself, or when capturing |
| 80 | //! the output of external commands. Conversions between [`OsString`], |
| 81 | //! [`OsStr`] and Rust strings work similarly to those for [`CString`] |
| 82 | //! and [`CStr`]. |
| 83 | //! |
| 84 | //! * [`OsString`] losslessly represents an owned platform string. However, this |
| 85 | //! representation is not necessarily in a form native to the platform. |
| 86 | //! In the Rust standard library, various APIs that transfer strings to/from the operating |
| 87 | //! system use [`OsString`] instead of plain strings. For example, |
| 88 | //! [`env::var_os()`] is used to query environment variables; it |
| 89 | //! returns an <code>[Option]<[OsString]></code>. If the environment variable |
| 90 | //! exists you will get a <code>[Some]\(os_string)</code>, which you can |
| 91 | //! *then* try to convert to a Rust string. This yields a [`Result`], so that |
| 92 | //! your code can detect errors in case the environment variable did |
| 93 | //! not in fact contain valid Unicode data. |
| 94 | //! |
| 95 | //! * [`OsStr`] losslessly represents a borrowed reference to a platform string. |
| 96 | //! However, this representation is not necessarily in a form native to the platform. |
| 97 | //! It can be converted into a UTF-8 Rust string slice in a similar way to |
| 98 | //! [`OsString`]. |
| 99 | //! |
| 100 | //! # Conversions |
| 101 | //! |
| 102 | //! ## On Unix |
| 103 | //! |
| 104 | //! On Unix, [`OsStr`] implements the |
| 105 | //! <code>std::os::unix::ffi::[OsStrExt][unix.OsStrExt]</code> trait, which |
| 106 | //! augments it with two methods, [`from_bytes`] and [`as_bytes`]. |
| 107 | //! These do inexpensive conversions from and to byte slices. |
| 108 | //! |
| 109 | //! Additionally, on Unix [`OsString`] implements the |
| 110 | //! <code>std::os::unix::ffi::[OsStringExt][unix.OsStringExt]</code> trait, |
| 111 | //! which provides [`from_vec`] and [`into_vec`] methods that consume |
| 112 | //! their arguments, and take or produce vectors of [`u8`]. |
| 113 | //! |
| 114 | //! ## On Windows |
| 115 | //! |
| 116 | //! An [`OsStr`] can be losslessly converted to a native Windows string. And |
| 117 | //! a native Windows string can be losslessly converted to an [`OsString`]. |
| 118 | //! |
| 119 | //! On Windows, [`OsStr`] implements the |
| 120 | //! <code>std::os::windows::ffi::[OsStrExt][windows.OsStrExt]</code> trait, |
| 121 | //! which provides an [`encode_wide`] method. This provides an |
| 122 | //! iterator that can be [`collect`]ed into a vector of [`u16`]. After a nul |
| 123 | //! characters is appended, this is the same as a native Windows string. |
| 124 | //! |
| 125 | //! Additionally, on Windows [`OsString`] implements the |
| 126 | //! <code>std::os::windows:ffi::[OsStringExt][windows.OsStringExt]</code> |
| 127 | //! trait, which provides a [`from_wide`] method to convert a native Windows |
| 128 | //! string (without the terminating nul character) to an [`OsString`]. |
| 129 | //! |
| 130 | //! ## Other platforms |
| 131 | //! |
| 132 | //! Many other platforms provide their own extension traits in a |
| 133 | //! `std::os::*::ffi` module. |
| 134 | //! |
| 135 | //! ## On all platforms |
| 136 | //! |
| 137 | //! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of |
| 138 | //! UTF-8; see [`OsString`] for more details on its encoding on different platforms. |
| 139 | //! |
| 140 | //! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_encoded_bytes`] and |
| 141 | //! [`OsStr::from_encoded_bytes_unchecked`]. |
| 142 | //! |
| 143 | //! For basic string processing, see [`OsStr::slice_encoded_bytes`]. |
| 144 | //! |
| 145 | //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value |
| 146 | //! [Unicode code point]: https://www.unicode.org/glossary/#code_point |
| 147 | //! [`env::set_var()`]: crate::env::set_var "env::set_var" |
| 148 | //! [`env::var_os()`]: crate::env::var_os "env::var_os" |
| 149 | //! [unix.OsStringExt]: crate::os::unix::ffi::OsStringExt "os::unix::ffi::OsStringExt" |
| 150 | //! [`from_vec`]: crate::os::unix::ffi::OsStringExt::from_vec "os::unix::ffi::OsStringExt::from_vec" |
| 151 | //! [`into_vec`]: crate::os::unix::ffi::OsStringExt::into_vec "os::unix::ffi::OsStringExt::into_vec" |
| 152 | //! [unix.OsStrExt]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt" |
| 153 | //! [`from_bytes`]: crate::os::unix::ffi::OsStrExt::from_bytes "os::unix::ffi::OsStrExt::from_bytes" |
| 154 | //! [`as_bytes`]: crate::os::unix::ffi::OsStrExt::as_bytes "os::unix::ffi::OsStrExt::as_bytes" |
| 155 | //! [`OsStrExt`]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt" |
| 156 | //! [windows.OsStrExt]: crate::os::windows::ffi::OsStrExt "os::windows::ffi::OsStrExt" |
| 157 | //! [`encode_wide`]: crate::os::windows::ffi::OsStrExt::encode_wide "os::windows::ffi::OsStrExt::encode_wide" |
| 158 | //! [`collect`]: crate::iter::Iterator::collect "iter::Iterator::collect" |
| 159 | //! [windows.OsStringExt]: crate::os::windows::ffi::OsStringExt "os::windows::ffi::OsStringExt" |
| 160 | //! [`from_wide`]: crate::os::windows::ffi::OsStringExt::from_wide "os::windows::ffi::OsStringExt::from_wide" |
| 161 | |
| 162 | #![stable (feature = "rust1" , since = "1.0.0" )] |
| 163 | |
| 164 | #[stable (feature = "c_str_module" , since = "1.88.0" )] |
| 165 | pub mod c_str; |
| 166 | |
| 167 | #[stable (feature = "core_c_void" , since = "1.30.0" )] |
| 168 | pub use core::ffi::c_void; |
| 169 | #[unstable ( |
| 170 | feature = "c_variadic" , |
| 171 | reason = "the `c_variadic` feature has not been properly tested on \ |
| 172 | all supported platforms" , |
| 173 | issue = "44930" |
| 174 | )] |
| 175 | pub use core::ffi::{VaArgSafe, VaList, VaListImpl}; |
| 176 | #[stable (feature = "core_ffi_c" , since = "1.64.0" )] |
| 177 | pub use core::ffi::{ |
| 178 | c_char, c_double, c_float, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, |
| 179 | c_ulong, c_ulonglong, c_ushort, |
| 180 | }; |
| 181 | #[unstable (feature = "c_size_t" , issue = "88345" )] |
| 182 | pub use core::ffi::{c_ptrdiff_t, c_size_t, c_ssize_t}; |
| 183 | |
| 184 | #[doc (inline)] |
| 185 | #[stable (feature = "cstr_from_bytes_until_nul" , since = "1.69.0" )] |
| 186 | pub use self::c_str::FromBytesUntilNulError; |
| 187 | #[doc (inline)] |
| 188 | #[stable (feature = "cstr_from_bytes" , since = "1.10.0" )] |
| 189 | pub use self::c_str::FromBytesWithNulError; |
| 190 | #[doc (inline)] |
| 191 | #[stable (feature = "cstring_from_vec_with_nul" , since = "1.58.0" )] |
| 192 | pub use self::c_str::FromVecWithNulError; |
| 193 | #[doc (inline)] |
| 194 | #[stable (feature = "cstring_into" , since = "1.7.0" )] |
| 195 | pub use self::c_str::IntoStringError; |
| 196 | #[doc (inline)] |
| 197 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 198 | pub use self::c_str::NulError; |
| 199 | #[doc (inline)] |
| 200 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 201 | pub use self::c_str::{CStr, CString}; |
| 202 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 203 | #[doc (inline)] |
| 204 | pub use self::os_str::{OsStr, OsString}; |
| 205 | |
| 206 | #[stable (feature = "os_str_display" , since = "1.87.0" )] |
| 207 | pub mod os_str; |
| 208 | |