1 | //! Ways to create a `str` from bytes slice. |
2 | |
3 | use super::Utf8Error; |
4 | use super::validations::run_utf8_validation; |
5 | use crate::{mem, ptr}; |
6 | |
7 | /// Converts a slice of bytes to a string slice. |
8 | /// |
9 | /// This is an alias to [`str::from_utf8`]. |
10 | /// |
11 | /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice |
12 | /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between |
13 | /// the two. Not all byte slices are valid string slices, however: [`&str`] requires |
14 | /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid |
15 | /// UTF-8, and then does the conversion. |
16 | /// |
17 | /// [`&str`]: str |
18 | /// [byteslice]: slice |
19 | /// |
20 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to |
21 | /// incur the overhead of the validity check, there is an unsafe version of |
22 | /// this function, [`from_utf8_unchecked`], which has the same |
23 | /// behavior but skips the check. |
24 | /// |
25 | /// If you need a `String` instead of a `&str`, consider |
26 | /// [`String::from_utf8`][string]. |
27 | /// |
28 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
29 | /// |
30 | /// Because you can stack-allocate a `[u8; N]`, and you can take a |
31 | /// [`&[u8]`][byteslice] of it, this function is one way to have a |
32 | /// stack-allocated string. There is an example of this in the |
33 | /// examples section below. |
34 | /// |
35 | /// [byteslice]: slice |
36 | /// |
37 | /// # Errors |
38 | /// |
39 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
40 | /// provided slice is not UTF-8. |
41 | /// |
42 | /// # Examples |
43 | /// |
44 | /// Basic usage: |
45 | /// |
46 | /// ``` |
47 | /// use std::str; |
48 | /// |
49 | /// // some bytes, in a vector |
50 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
51 | /// |
52 | /// // We can use the ? (try) operator to check if the bytes are valid |
53 | /// let sparkle_heart = str::from_utf8(&sparkle_heart)?; |
54 | /// |
55 | /// assert_eq!("💖" , sparkle_heart); |
56 | /// # Ok::<_, str::Utf8Error>(()) |
57 | /// ``` |
58 | /// |
59 | /// Incorrect bytes: |
60 | /// |
61 | /// ``` |
62 | /// use std::str; |
63 | /// |
64 | /// // some invalid bytes, in a vector |
65 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
66 | /// |
67 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); |
68 | /// ``` |
69 | /// |
70 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
71 | /// errors that can be returned. |
72 | /// |
73 | /// A "stack allocated string": |
74 | /// |
75 | /// ``` |
76 | /// use std::str; |
77 | /// |
78 | /// // some bytes, in a stack-allocated array |
79 | /// let sparkle_heart = [240, 159, 146, 150]; |
80 | /// |
81 | /// // We know these bytes are valid, so just use `unwrap()`. |
82 | /// let sparkle_heart: &str = str::from_utf8(&sparkle_heart).unwrap(); |
83 | /// |
84 | /// assert_eq!("💖" , sparkle_heart); |
85 | /// ``` |
86 | #[stable (feature = "rust1" , since = "1.0.0" )] |
87 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
88 | #[rustc_diagnostic_item = "str_from_utf8" ] |
89 | pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
90 | // FIXME(const-hack): This should use `?` again, once it's `const` |
91 | match run_utf8_validation(v) { |
92 | Ok(_) => { |
93 | // SAFETY: validation succeeded. |
94 | Ok(unsafe { from_utf8_unchecked(v) }) |
95 | } |
96 | Err(err: Utf8Error) => Err(err), |
97 | } |
98 | } |
99 | |
100 | /// Converts a mutable slice of bytes to a mutable string slice. |
101 | /// |
102 | /// This is an alias to [`str::from_utf8_mut`]. |
103 | /// |
104 | /// # Examples |
105 | /// |
106 | /// Basic usage: |
107 | /// |
108 | /// ``` |
109 | /// use std::str; |
110 | /// |
111 | /// // "Hello, Rust!" as a mutable vector |
112 | /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; |
113 | /// |
114 | /// // As we know these bytes are valid, we can use `unwrap()` |
115 | /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); |
116 | /// |
117 | /// assert_eq!("Hello, Rust!" , outstr); |
118 | /// ``` |
119 | /// |
120 | /// Incorrect bytes: |
121 | /// |
122 | /// ``` |
123 | /// use std::str; |
124 | /// |
125 | /// // Some invalid bytes in a mutable vector |
126 | /// let mut invalid = vec![128, 223]; |
127 | /// |
128 | /// assert!(str::from_utf8_mut(&mut invalid).is_err()); |
129 | /// ``` |
130 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
131 | /// errors that can be returned. |
132 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
133 | #[rustc_const_stable (feature = "const_str_from_utf8" , since = "1.87.0" )] |
134 | #[rustc_diagnostic_item = "str_from_utf8_mut" ] |
135 | pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { |
136 | // FIXME(const-hack): This should use `?` again, once it's `const` |
137 | match run_utf8_validation(v) { |
138 | Ok(_) => { |
139 | // SAFETY: validation succeeded. |
140 | Ok(unsafe { from_utf8_unchecked_mut(v) }) |
141 | } |
142 | Err(err: Utf8Error) => Err(err), |
143 | } |
144 | } |
145 | |
146 | /// Converts a slice of bytes to a string slice without checking |
147 | /// that the string contains valid UTF-8. |
148 | /// |
149 | /// This is an alias to [`str::from_utf8_unchecked`]. |
150 | /// |
151 | /// See the safe version, [`from_utf8`], for more information. |
152 | /// |
153 | /// # Safety |
154 | /// |
155 | /// The bytes passed in must be valid UTF-8. |
156 | /// |
157 | /// # Examples |
158 | /// |
159 | /// Basic usage: |
160 | /// |
161 | /// ``` |
162 | /// use std::str; |
163 | /// |
164 | /// // some bytes, in a vector |
165 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
166 | /// |
167 | /// let sparkle_heart = unsafe { |
168 | /// str::from_utf8_unchecked(&sparkle_heart) |
169 | /// }; |
170 | /// |
171 | /// assert_eq!("💖" , sparkle_heart); |
172 | /// ``` |
173 | #[inline ] |
174 | #[must_use ] |
175 | #[stable (feature = "rust1" , since = "1.0.0" )] |
176 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked" , since = "1.55.0" )] |
177 | #[rustc_diagnostic_item = "str_from_utf8_unchecked" ] |
178 | pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
179 | // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. |
180 | // Also relies on `&str` and `&[u8]` having the same layout. |
181 | unsafe { mem::transmute(src:v) } |
182 | } |
183 | |
184 | /// Converts a slice of bytes to a string slice without checking |
185 | /// that the string contains valid UTF-8; mutable version. |
186 | /// |
187 | /// This is an alias to [`str::from_utf8_unchecked_mut`]. |
188 | /// |
189 | /// See the immutable version, [`from_utf8_unchecked()`] for documentation and safety requirements. |
190 | /// |
191 | /// # Examples |
192 | /// |
193 | /// Basic usage: |
194 | /// |
195 | /// ``` |
196 | /// use std::str; |
197 | /// |
198 | /// let mut heart = vec![240, 159, 146, 150]; |
199 | /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; |
200 | /// |
201 | /// assert_eq!("💖" , heart); |
202 | /// ``` |
203 | #[inline ] |
204 | #[must_use ] |
205 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
206 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked_mut" , since = "1.83.0" )] |
207 | #[rustc_diagnostic_item = "str_from_utf8_unchecked_mut" ] |
208 | pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { |
209 | // SAFETY: the caller must guarantee that the bytes `v` |
210 | // are valid UTF-8, thus the cast to `*mut str` is safe. |
211 | // Also, the pointer dereference is safe because that pointer |
212 | // comes from a reference which is guaranteed to be valid for writes. |
213 | unsafe { &mut *(v as *mut [u8] as *mut str) } |
214 | } |
215 | |
216 | /// Creates a `&str` from a pointer and a length. |
217 | /// |
218 | /// The pointed-to bytes must be valid UTF-8. |
219 | /// If this might not be the case, use `str::from_utf8(slice::from_raw_parts(ptr, len))`, |
220 | /// which will return an `Err` if the data isn't valid UTF-8. |
221 | /// |
222 | /// This function is the `str` equivalent of [`slice::from_raw_parts`](crate::slice::from_raw_parts). |
223 | /// See that function's documentation for safety concerns and examples. |
224 | /// |
225 | /// The mutable version of this function is [`from_raw_parts_mut`]. |
226 | #[inline ] |
227 | #[must_use ] |
228 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
229 | pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { |
230 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts`. |
231 | unsafe { &*ptr::from_raw_parts(data_pointer:ptr, metadata:len) } |
232 | } |
233 | |
234 | /// Creates a `&mut str` from a pointer and a length. |
235 | /// |
236 | /// The pointed-to bytes must be valid UTF-8. |
237 | /// If this might not be the case, use `str::from_utf8_mut(slice::from_raw_parts_mut(ptr, len))`, |
238 | /// which will return an `Err` if the data isn't valid UTF-8. |
239 | /// |
240 | /// This function is the `str` equivalent of [`slice::from_raw_parts_mut`](crate::slice::from_raw_parts_mut). |
241 | /// See that function's documentation for safety concerns and examples. |
242 | /// |
243 | /// The immutable version of this function is [`from_raw_parts`]. |
244 | #[inline ] |
245 | #[must_use ] |
246 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
247 | pub const unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str { |
248 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts_mut`. |
249 | unsafe { &mut *ptr::from_raw_parts_mut(data_pointer:ptr, metadata:len) } |
250 | } |
251 | |