| 1 | //! Ways to create a `str` from bytes slice. |
| 2 | |
| 3 | use super::Utf8Error; |
| 4 | use super::validations::run_utf8_validation; |
| 5 | use crate::{mem, ptr}; |
| 6 | |
| 7 | /// Converts a slice of bytes to a string slice. |
| 8 | /// |
| 9 | /// This is an alias to [`str::from_utf8`]. |
| 10 | /// |
| 11 | /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice |
| 12 | /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between |
| 13 | /// the two. Not all byte slices are valid string slices, however: [`&str`] requires |
| 14 | /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid |
| 15 | /// UTF-8, and then does the conversion. |
| 16 | /// |
| 17 | /// [`&str`]: str |
| 18 | /// [byteslice]: slice |
| 19 | /// |
| 20 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to |
| 21 | /// incur the overhead of the validity check, there is an unsafe version of |
| 22 | /// this function, [`from_utf8_unchecked`], which has the same |
| 23 | /// behavior but skips the check. |
| 24 | /// |
| 25 | /// If you need a `String` instead of a `&str`, consider |
| 26 | /// [`String::from_utf8`][string]. |
| 27 | /// |
| 28 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
| 29 | /// |
| 30 | /// Because you can stack-allocate a `[u8; N]`, and you can take a |
| 31 | /// [`&[u8]`][byteslice] of it, this function is one way to have a |
| 32 | /// stack-allocated string. There is an example of this in the |
| 33 | /// examples section below. |
| 34 | /// |
| 35 | /// [byteslice]: slice |
| 36 | /// |
| 37 | /// # Errors |
| 38 | /// |
| 39 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
| 40 | /// provided slice is not UTF-8. |
| 41 | /// |
| 42 | /// # Examples |
| 43 | /// |
| 44 | /// Basic usage: |
| 45 | /// |
| 46 | /// ``` |
| 47 | /// use std::str; |
| 48 | /// |
| 49 | /// // some bytes, in a vector |
| 50 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
| 51 | /// |
| 52 | /// // We can use the ? (try) operator to check if the bytes are valid |
| 53 | /// let sparkle_heart = str::from_utf8(&sparkle_heart)?; |
| 54 | /// |
| 55 | /// assert_eq!("💖" , sparkle_heart); |
| 56 | /// # Ok::<_, str::Utf8Error>(()) |
| 57 | /// ``` |
| 58 | /// |
| 59 | /// Incorrect bytes: |
| 60 | /// |
| 61 | /// ``` |
| 62 | /// use std::str; |
| 63 | /// |
| 64 | /// // some invalid bytes, in a vector |
| 65 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
| 66 | /// |
| 67 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); |
| 68 | /// ``` |
| 69 | /// |
| 70 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
| 71 | /// errors that can be returned. |
| 72 | /// |
| 73 | /// A "stack allocated string": |
| 74 | /// |
| 75 | /// ``` |
| 76 | /// use std::str; |
| 77 | /// |
| 78 | /// // some bytes, in a stack-allocated array |
| 79 | /// let sparkle_heart = [240, 159, 146, 150]; |
| 80 | /// |
| 81 | /// // We know these bytes are valid, so just use `unwrap()`. |
| 82 | /// let sparkle_heart: &str = str::from_utf8(&sparkle_heart).unwrap(); |
| 83 | /// |
| 84 | /// assert_eq!("💖" , sparkle_heart); |
| 85 | /// ``` |
| 86 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 87 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
| 88 | #[rustc_diagnostic_item = "str_from_utf8" ] |
| 89 | pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
| 90 | // FIXME(const-hack): This should use `?` again, once it's `const` |
| 91 | match run_utf8_validation(v) { |
| 92 | Ok(_) => { |
| 93 | // SAFETY: validation succeeded. |
| 94 | Ok(unsafe { from_utf8_unchecked(v) }) |
| 95 | } |
| 96 | Err(err: Utf8Error) => Err(err), |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | /// Converts a mutable slice of bytes to a mutable string slice. |
| 101 | /// |
| 102 | /// This is an alias to [`str::from_utf8_mut`]. |
| 103 | /// |
| 104 | /// # Examples |
| 105 | /// |
| 106 | /// Basic usage: |
| 107 | /// |
| 108 | /// ``` |
| 109 | /// use std::str; |
| 110 | /// |
| 111 | /// // "Hello, Rust!" as a mutable vector |
| 112 | /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; |
| 113 | /// |
| 114 | /// // As we know these bytes are valid, we can use `unwrap()` |
| 115 | /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); |
| 116 | /// |
| 117 | /// assert_eq!("Hello, Rust!" , outstr); |
| 118 | /// ``` |
| 119 | /// |
| 120 | /// Incorrect bytes: |
| 121 | /// |
| 122 | /// ``` |
| 123 | /// use std::str; |
| 124 | /// |
| 125 | /// // Some invalid bytes in a mutable vector |
| 126 | /// let mut invalid = vec![128, 223]; |
| 127 | /// |
| 128 | /// assert!(str::from_utf8_mut(&mut invalid).is_err()); |
| 129 | /// ``` |
| 130 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
| 131 | /// errors that can be returned. |
| 132 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
| 133 | #[rustc_const_stable (feature = "const_str_from_utf8" , since = "1.87.0" )] |
| 134 | #[rustc_diagnostic_item = "str_from_utf8_mut" ] |
| 135 | pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { |
| 136 | // FIXME(const-hack): This should use `?` again, once it's `const` |
| 137 | match run_utf8_validation(v) { |
| 138 | Ok(_) => { |
| 139 | // SAFETY: validation succeeded. |
| 140 | Ok(unsafe { from_utf8_unchecked_mut(v) }) |
| 141 | } |
| 142 | Err(err: Utf8Error) => Err(err), |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | /// Converts a slice of bytes to a string slice without checking |
| 147 | /// that the string contains valid UTF-8. |
| 148 | /// |
| 149 | /// This is an alias to [`str::from_utf8_unchecked`]. |
| 150 | /// |
| 151 | /// See the safe version, [`from_utf8`], for more information. |
| 152 | /// |
| 153 | /// # Safety |
| 154 | /// |
| 155 | /// The bytes passed in must be valid UTF-8. |
| 156 | /// |
| 157 | /// # Examples |
| 158 | /// |
| 159 | /// Basic usage: |
| 160 | /// |
| 161 | /// ``` |
| 162 | /// use std::str; |
| 163 | /// |
| 164 | /// // some bytes, in a vector |
| 165 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
| 166 | /// |
| 167 | /// let sparkle_heart = unsafe { |
| 168 | /// str::from_utf8_unchecked(&sparkle_heart) |
| 169 | /// }; |
| 170 | /// |
| 171 | /// assert_eq!("💖" , sparkle_heart); |
| 172 | /// ``` |
| 173 | #[inline ] |
| 174 | #[must_use ] |
| 175 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 176 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked" , since = "1.55.0" )] |
| 177 | #[rustc_diagnostic_item = "str_from_utf8_unchecked" ] |
| 178 | pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
| 179 | // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. |
| 180 | // Also relies on `&str` and `&[u8]` having the same layout. |
| 181 | unsafe { mem::transmute(src:v) } |
| 182 | } |
| 183 | |
| 184 | /// Converts a slice of bytes to a string slice without checking |
| 185 | /// that the string contains valid UTF-8; mutable version. |
| 186 | /// |
| 187 | /// This is an alias to [`str::from_utf8_unchecked_mut`]. |
| 188 | /// |
| 189 | /// See the immutable version, [`from_utf8_unchecked()`] for documentation and safety requirements. |
| 190 | /// |
| 191 | /// # Examples |
| 192 | /// |
| 193 | /// Basic usage: |
| 194 | /// |
| 195 | /// ``` |
| 196 | /// use std::str; |
| 197 | /// |
| 198 | /// let mut heart = vec![240, 159, 146, 150]; |
| 199 | /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; |
| 200 | /// |
| 201 | /// assert_eq!("💖" , heart); |
| 202 | /// ``` |
| 203 | #[inline ] |
| 204 | #[must_use ] |
| 205 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
| 206 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked_mut" , since = "1.83.0" )] |
| 207 | #[rustc_diagnostic_item = "str_from_utf8_unchecked_mut" ] |
| 208 | pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { |
| 209 | // SAFETY: the caller must guarantee that the bytes `v` |
| 210 | // are valid UTF-8, thus the cast to `*mut str` is safe. |
| 211 | // Also, the pointer dereference is safe because that pointer |
| 212 | // comes from a reference which is guaranteed to be valid for writes. |
| 213 | unsafe { &mut *(v as *mut [u8] as *mut str) } |
| 214 | } |
| 215 | |
| 216 | /// Creates a `&str` from a pointer and a length. |
| 217 | /// |
| 218 | /// The pointed-to bytes must be valid UTF-8. |
| 219 | /// If this might not be the case, use `str::from_utf8(slice::from_raw_parts(ptr, len))`, |
| 220 | /// which will return an `Err` if the data isn't valid UTF-8. |
| 221 | /// |
| 222 | /// This function is the `str` equivalent of [`slice::from_raw_parts`](crate::slice::from_raw_parts). |
| 223 | /// See that function's documentation for safety concerns and examples. |
| 224 | /// |
| 225 | /// The mutable version of this function is [`from_raw_parts_mut`]. |
| 226 | #[inline ] |
| 227 | #[must_use ] |
| 228 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
| 229 | pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { |
| 230 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts`. |
| 231 | unsafe { &*ptr::from_raw_parts(data_pointer:ptr, metadata:len) } |
| 232 | } |
| 233 | |
| 234 | /// Creates a `&mut str` from a pointer and a length. |
| 235 | /// |
| 236 | /// The pointed-to bytes must be valid UTF-8. |
| 237 | /// If this might not be the case, use `str::from_utf8_mut(slice::from_raw_parts_mut(ptr, len))`, |
| 238 | /// which will return an `Err` if the data isn't valid UTF-8. |
| 239 | /// |
| 240 | /// This function is the `str` equivalent of [`slice::from_raw_parts_mut`](crate::slice::from_raw_parts_mut). |
| 241 | /// See that function's documentation for safety concerns and examples. |
| 242 | /// |
| 243 | /// The immutable version of this function is [`from_raw_parts`]. |
| 244 | #[inline ] |
| 245 | #[must_use ] |
| 246 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
| 247 | pub const unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str { |
| 248 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts_mut`. |
| 249 | unsafe { &mut *ptr::from_raw_parts_mut(data_pointer:ptr, metadata:len) } |
| 250 | } |
| 251 | |