1 | //! Ways to create a `str` from bytes slice. |
2 | |
3 | use super::Utf8Error; |
4 | use super::validations::run_utf8_validation; |
5 | use crate::{mem, ptr}; |
6 | |
7 | /// Converts a slice of bytes to a string slice. |
8 | /// |
9 | /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice |
10 | /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between |
11 | /// the two. Not all byte slices are valid string slices, however: [`&str`] requires |
12 | /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid |
13 | /// UTF-8, and then does the conversion. |
14 | /// |
15 | /// [`&str`]: str |
16 | /// [byteslice]: slice |
17 | /// |
18 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to |
19 | /// incur the overhead of the validity check, there is an unsafe version of |
20 | /// this function, [`from_utf8_unchecked`], which has the same |
21 | /// behavior but skips the check. |
22 | /// |
23 | /// If you need a `String` instead of a `&str`, consider |
24 | /// [`String::from_utf8`][string]. |
25 | /// |
26 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
27 | /// |
28 | /// Because you can stack-allocate a `[u8; N]`, and you can take a |
29 | /// [`&[u8]`][byteslice] of it, this function is one way to have a |
30 | /// stack-allocated string. There is an example of this in the |
31 | /// examples section below. |
32 | /// |
33 | /// [byteslice]: slice |
34 | /// |
35 | /// # Errors |
36 | /// |
37 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
38 | /// provided slice is not UTF-8. |
39 | /// |
40 | /// # Examples |
41 | /// |
42 | /// Basic usage: |
43 | /// |
44 | /// ``` |
45 | /// use std::str; |
46 | /// |
47 | /// // some bytes, in a vector |
48 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
49 | /// |
50 | /// // We can use the ? (try) operator to check if the bytes are valid |
51 | /// let sparkle_heart = str::from_utf8(&sparkle_heart)?; |
52 | /// |
53 | /// assert_eq!("💖" , sparkle_heart); |
54 | /// # Ok::<_, str::Utf8Error>(()) |
55 | /// ``` |
56 | /// |
57 | /// Incorrect bytes: |
58 | /// |
59 | /// ``` |
60 | /// use std::str; |
61 | /// |
62 | /// // some invalid bytes, in a vector |
63 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
64 | /// |
65 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); |
66 | /// ``` |
67 | /// |
68 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
69 | /// errors that can be returned. |
70 | /// |
71 | /// A "stack allocated string": |
72 | /// |
73 | /// ``` |
74 | /// use std::str; |
75 | /// |
76 | /// // some bytes, in a stack-allocated array |
77 | /// let sparkle_heart = [240, 159, 146, 150]; |
78 | /// |
79 | /// // We know these bytes are valid, so just use `unwrap()`. |
80 | /// let sparkle_heart: &str = str::from_utf8(&sparkle_heart).unwrap(); |
81 | /// |
82 | /// assert_eq!("💖" , sparkle_heart); |
83 | /// ``` |
84 | #[stable (feature = "rust1" , since = "1.0.0" )] |
85 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
86 | #[rustc_diagnostic_item = "str_from_utf8" ] |
87 | pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
88 | // FIXME(const-hack): This should use `?` again, once it's `const` |
89 | match run_utf8_validation(v) { |
90 | Ok(_) => { |
91 | // SAFETY: validation succeeded. |
92 | Ok(unsafe { from_utf8_unchecked(v) }) |
93 | } |
94 | Err(err: Utf8Error) => Err(err), |
95 | } |
96 | } |
97 | |
98 | /// Converts a mutable slice of bytes to a mutable string slice. |
99 | /// |
100 | /// # Examples |
101 | /// |
102 | /// Basic usage: |
103 | /// |
104 | /// ``` |
105 | /// use std::str; |
106 | /// |
107 | /// // "Hello, Rust!" as a mutable vector |
108 | /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; |
109 | /// |
110 | /// // As we know these bytes are valid, we can use `unwrap()` |
111 | /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); |
112 | /// |
113 | /// assert_eq!("Hello, Rust!" , outstr); |
114 | /// ``` |
115 | /// |
116 | /// Incorrect bytes: |
117 | /// |
118 | /// ``` |
119 | /// use std::str; |
120 | /// |
121 | /// // Some invalid bytes in a mutable vector |
122 | /// let mut invalid = vec![128, 223]; |
123 | /// |
124 | /// assert!(str::from_utf8_mut(&mut invalid).is_err()); |
125 | /// ``` |
126 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
127 | /// errors that can be returned. |
128 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
129 | #[rustc_const_stable (feature = "const_str_from_utf8" , since = "1.87.0" )] |
130 | #[rustc_diagnostic_item = "str_from_utf8_mut" ] |
131 | pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { |
132 | // FIXME(const-hack): This should use `?` again, once it's `const` |
133 | match run_utf8_validation(v) { |
134 | Ok(_) => { |
135 | // SAFETY: validation succeeded. |
136 | Ok(unsafe { from_utf8_unchecked_mut(v) }) |
137 | } |
138 | Err(err: Utf8Error) => Err(err), |
139 | } |
140 | } |
141 | |
142 | /// Converts a slice of bytes to a string slice without checking |
143 | /// that the string contains valid UTF-8. |
144 | /// |
145 | /// See the safe version, [`from_utf8`], for more information. |
146 | /// |
147 | /// # Safety |
148 | /// |
149 | /// The bytes passed in must be valid UTF-8. |
150 | /// |
151 | /// # Examples |
152 | /// |
153 | /// Basic usage: |
154 | /// |
155 | /// ``` |
156 | /// use std::str; |
157 | /// |
158 | /// // some bytes, in a vector |
159 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
160 | /// |
161 | /// let sparkle_heart = unsafe { |
162 | /// str::from_utf8_unchecked(&sparkle_heart) |
163 | /// }; |
164 | /// |
165 | /// assert_eq!("💖" , sparkle_heart); |
166 | /// ``` |
167 | #[inline ] |
168 | #[must_use ] |
169 | #[stable (feature = "rust1" , since = "1.0.0" )] |
170 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked" , since = "1.55.0" )] |
171 | #[rustc_diagnostic_item = "str_from_utf8_unchecked" ] |
172 | pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
173 | // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. |
174 | // Also relies on `&str` and `&[u8]` having the same layout. |
175 | unsafe { mem::transmute(src:v) } |
176 | } |
177 | |
178 | /// Converts a slice of bytes to a string slice without checking |
179 | /// that the string contains valid UTF-8; mutable version. |
180 | /// |
181 | /// See the immutable version, [`from_utf8_unchecked()`] for more information. |
182 | /// |
183 | /// # Examples |
184 | /// |
185 | /// Basic usage: |
186 | /// |
187 | /// ``` |
188 | /// use std::str; |
189 | /// |
190 | /// let mut heart = vec![240, 159, 146, 150]; |
191 | /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; |
192 | /// |
193 | /// assert_eq!("💖" , heart); |
194 | /// ``` |
195 | #[inline ] |
196 | #[must_use ] |
197 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
198 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked_mut" , since = "1.83.0" )] |
199 | #[rustc_diagnostic_item = "str_from_utf8_unchecked_mut" ] |
200 | pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { |
201 | // SAFETY: the caller must guarantee that the bytes `v` |
202 | // are valid UTF-8, thus the cast to `*mut str` is safe. |
203 | // Also, the pointer dereference is safe because that pointer |
204 | // comes from a reference which is guaranteed to be valid for writes. |
205 | unsafe { &mut *(v as *mut [u8] as *mut str) } |
206 | } |
207 | |
208 | /// Creates a `&str` from a pointer and a length. |
209 | /// |
210 | /// The pointed-to bytes must be valid UTF-8. |
211 | /// If this might not be the case, use `str::from_utf8(slice::from_raw_parts(ptr, len))`, |
212 | /// which will return an `Err` if the data isn't valid UTF-8. |
213 | /// |
214 | /// This function is the `str` equivalent of [`slice::from_raw_parts`](crate::slice::from_raw_parts). |
215 | /// See that function's documentation for safety concerns and examples. |
216 | /// |
217 | /// The mutable version of this function is [`from_raw_parts_mut`]. |
218 | #[inline ] |
219 | #[must_use ] |
220 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
221 | pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { |
222 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts`. |
223 | unsafe { &*ptr::from_raw_parts(data_pointer:ptr, metadata:len) } |
224 | } |
225 | |
226 | /// Creates a `&mut str` from a pointer and a length. |
227 | /// |
228 | /// The pointed-to bytes must be valid UTF-8. |
229 | /// If this might not be the case, use `str::from_utf8_mut(slice::from_raw_parts_mut(ptr, len))`, |
230 | /// which will return an `Err` if the data isn't valid UTF-8. |
231 | /// |
232 | /// This function is the `str` equivalent of [`slice::from_raw_parts_mut`](crate::slice::from_raw_parts_mut). |
233 | /// See that function's documentation for safety concerns and examples. |
234 | /// |
235 | /// The immutable version of this function is [`from_raw_parts`]. |
236 | #[inline ] |
237 | #[must_use ] |
238 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
239 | pub const unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str { |
240 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts_mut`. |
241 | unsafe { &mut *ptr::from_raw_parts_mut(data_pointer:ptr, metadata:len) } |
242 | } |
243 | |