1 | //! Ways to create a `str` from bytes slice. |
2 | |
3 | use crate::{mem, ptr}; |
4 | |
5 | use super::validations::run_utf8_validation; |
6 | use super::Utf8Error; |
7 | |
8 | /// Converts a slice of bytes to a string slice. |
9 | /// |
10 | /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice |
11 | /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between |
12 | /// the two. Not all byte slices are valid string slices, however: [`&str`] requires |
13 | /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid |
14 | /// UTF-8, and then does the conversion. |
15 | /// |
16 | /// [`&str`]: str |
17 | /// [byteslice]: slice |
18 | /// |
19 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to |
20 | /// incur the overhead of the validity check, there is an unsafe version of |
21 | /// this function, [`from_utf8_unchecked`], which has the same |
22 | /// behavior but skips the check. |
23 | /// |
24 | /// If you need a `String` instead of a `&str`, consider |
25 | /// [`String::from_utf8`][string]. |
26 | /// |
27 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
28 | /// |
29 | /// Because you can stack-allocate a `[u8; N]`, and you can take a |
30 | /// [`&[u8]`][byteslice] of it, this function is one way to have a |
31 | /// stack-allocated string. There is an example of this in the |
32 | /// examples section below. |
33 | /// |
34 | /// [byteslice]: slice |
35 | /// |
36 | /// # Errors |
37 | /// |
38 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
39 | /// provided slice is not UTF-8. |
40 | /// |
41 | /// # Examples |
42 | /// |
43 | /// Basic usage: |
44 | /// |
45 | /// ``` |
46 | /// use std::str; |
47 | /// |
48 | /// // some bytes, in a vector |
49 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
50 | /// |
51 | /// // We know these bytes are valid, so just use `unwrap()`. |
52 | /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); |
53 | /// |
54 | /// assert_eq!("💖" , sparkle_heart); |
55 | /// ``` |
56 | /// |
57 | /// Incorrect bytes: |
58 | /// |
59 | /// ``` |
60 | /// use std::str; |
61 | /// |
62 | /// // some invalid bytes, in a vector |
63 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
64 | /// |
65 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); |
66 | /// ``` |
67 | /// |
68 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
69 | /// errors that can be returned. |
70 | /// |
71 | /// A "stack allocated string": |
72 | /// |
73 | /// ``` |
74 | /// use std::str; |
75 | /// |
76 | /// // some bytes, in a stack-allocated array |
77 | /// let sparkle_heart = [240, 159, 146, 150]; |
78 | /// |
79 | /// // We know these bytes are valid, so just use `unwrap()`. |
80 | /// let sparkle_heart: &str = str::from_utf8(&sparkle_heart).unwrap(); |
81 | /// |
82 | /// assert_eq!("💖" , sparkle_heart); |
83 | /// ``` |
84 | #[stable (feature = "rust1" , since = "1.0.0" )] |
85 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
86 | #[rustc_allow_const_fn_unstable (str_internals)] |
87 | #[rustc_diagnostic_item = "str_from_utf8" ] |
88 | pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
89 | // FIXME: This should use `?` again, once it's `const` |
90 | match run_utf8_validation(v) { |
91 | Ok(_) => { |
92 | // SAFETY: validation succeeded. |
93 | Ok(unsafe { from_utf8_unchecked(v) }) |
94 | } |
95 | Err(err: Utf8Error) => Err(err), |
96 | } |
97 | } |
98 | |
99 | /// Converts a mutable slice of bytes to a mutable string slice. |
100 | /// |
101 | /// # Examples |
102 | /// |
103 | /// Basic usage: |
104 | /// |
105 | /// ``` |
106 | /// use std::str; |
107 | /// |
108 | /// // "Hello, Rust!" as a mutable vector |
109 | /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; |
110 | /// |
111 | /// // As we know these bytes are valid, we can use `unwrap()` |
112 | /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); |
113 | /// |
114 | /// assert_eq!("Hello, Rust!" , outstr); |
115 | /// ``` |
116 | /// |
117 | /// Incorrect bytes: |
118 | /// |
119 | /// ``` |
120 | /// use std::str; |
121 | /// |
122 | /// // Some invalid bytes in a mutable vector |
123 | /// let mut invalid = vec![128, 223]; |
124 | /// |
125 | /// assert!(str::from_utf8_mut(&mut invalid).is_err()); |
126 | /// ``` |
127 | /// See the docs for [`Utf8Error`] for more details on the kinds of |
128 | /// errors that can be returned. |
129 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
130 | #[rustc_const_unstable (feature = "const_str_from_utf8" , issue = "91006" )] |
131 | #[rustc_diagnostic_item = "str_from_utf8_mut" ] |
132 | pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { |
133 | // This should use `?` again, once it's `const` |
134 | match run_utf8_validation(v) { |
135 | Ok(_) => { |
136 | // SAFETY: validation succeeded. |
137 | Ok(unsafe { from_utf8_unchecked_mut(v) }) |
138 | } |
139 | Err(err: Utf8Error) => Err(err), |
140 | } |
141 | } |
142 | |
143 | /// Converts a slice of bytes to a string slice without checking |
144 | /// that the string contains valid UTF-8. |
145 | /// |
146 | /// See the safe version, [`from_utf8`], for more information. |
147 | /// |
148 | /// # Safety |
149 | /// |
150 | /// The bytes passed in must be valid UTF-8. |
151 | /// |
152 | /// # Examples |
153 | /// |
154 | /// Basic usage: |
155 | /// |
156 | /// ``` |
157 | /// use std::str; |
158 | /// |
159 | /// // some bytes, in a vector |
160 | /// let sparkle_heart = vec![240, 159, 146, 150]; |
161 | /// |
162 | /// let sparkle_heart = unsafe { |
163 | /// str::from_utf8_unchecked(&sparkle_heart) |
164 | /// }; |
165 | /// |
166 | /// assert_eq!("💖" , sparkle_heart); |
167 | /// ``` |
168 | #[inline ] |
169 | #[must_use ] |
170 | #[stable (feature = "rust1" , since = "1.0.0" )] |
171 | #[rustc_const_stable (feature = "const_str_from_utf8_unchecked" , since = "1.55.0" )] |
172 | #[rustc_diagnostic_item = "str_from_utf8_unchecked" ] |
173 | pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
174 | // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. |
175 | // Also relies on `&str` and `&[u8]` having the same layout. |
176 | unsafe { mem::transmute(src:v) } |
177 | } |
178 | |
179 | /// Converts a slice of bytes to a string slice without checking |
180 | /// that the string contains valid UTF-8; mutable version. |
181 | /// |
182 | /// See the immutable version, [`from_utf8_unchecked()`] for more information. |
183 | /// |
184 | /// # Examples |
185 | /// |
186 | /// Basic usage: |
187 | /// |
188 | /// ``` |
189 | /// use std::str; |
190 | /// |
191 | /// let mut heart = vec![240, 159, 146, 150]; |
192 | /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; |
193 | /// |
194 | /// assert_eq!("💖" , heart); |
195 | /// ``` |
196 | #[inline ] |
197 | #[must_use ] |
198 | #[stable (feature = "str_mut_extras" , since = "1.20.0" )] |
199 | #[rustc_const_unstable (feature = "const_str_from_utf8_unchecked_mut" , issue = "91005" )] |
200 | #[rustc_diagnostic_item = "str_from_utf8_unchecked_mut" ] |
201 | pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { |
202 | // SAFETY: the caller must guarantee that the bytes `v` |
203 | // are valid UTF-8, thus the cast to `*mut str` is safe. |
204 | // Also, the pointer dereference is safe because that pointer |
205 | // comes from a reference which is guaranteed to be valid for writes. |
206 | unsafe { &mut *(v as *mut [u8] as *mut str) } |
207 | } |
208 | |
209 | /// Creates an `&str` from a pointer and a length. |
210 | /// |
211 | /// The pointed-to bytes must be valid UTF-8. |
212 | /// If this might not be the case, use `str::from_utf8(slice::from_raw_parts(ptr, len))`, |
213 | /// which will return an `Err` if the data isn't valid UTF-8. |
214 | /// |
215 | /// This function is the `str` equivalent of [`slice::from_raw_parts`](crate::slice::from_raw_parts). |
216 | /// See that function's documentation for safety concerns and examples. |
217 | /// |
218 | /// The mutable version of this function is [`from_raw_parts_mut`]. |
219 | #[inline ] |
220 | #[must_use ] |
221 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
222 | #[rustc_const_unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
223 | pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { |
224 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts`. |
225 | unsafe { &*ptr::from_raw_parts(data_pointer:ptr.cast(), metadata:len) } |
226 | } |
227 | |
228 | /// Creates an `&mut str` from a pointer and a length. |
229 | /// |
230 | /// The pointed-to bytes must be valid UTF-8. |
231 | /// If this might not be the case, use `str::from_utf8_mut(slice::from_raw_parts_mut(ptr, len))`, |
232 | /// which will return an `Err` if the data isn't valid UTF-8. |
233 | /// |
234 | /// This function is the `str` equivalent of [`slice::from_raw_parts_mut`](crate::slice::from_raw_parts_mut). |
235 | /// See that function's documentation for safety concerns and examples. |
236 | /// |
237 | /// The immutable version of this function is [`from_raw_parts`]. |
238 | #[inline ] |
239 | #[must_use ] |
240 | #[unstable (feature = "str_from_raw_parts" , issue = "119206" )] |
241 | #[rustc_const_unstable (feature = "const_str_from_raw_parts_mut" , issue = "119206" )] |
242 | pub const unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a str { |
243 | // SAFETY: the caller must uphold the safety contract for `from_raw_parts_mut`. |
244 | unsafe { &mut *ptr::from_raw_parts_mut(data_pointer:ptr.cast(), metadata:len) } |
245 | } |
246 | |