1 | // Copyright 2013-2014 The Rust Project Developers. |
2 | // Copyright 2018 The Uuid Project Developers. |
3 | // |
4 | // See the COPYRIGHT file at the top-level directory of this distribution. |
5 | // |
6 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
7 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
8 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
9 | // option. This file may not be copied, modified, or distributed |
10 | // except according to those terms. |
11 | |
12 | //! [`Uuid`] parsing constructs and utilities. |
13 | //! |
14 | //! [`Uuid`]: ../struct.Uuid.html |
15 | |
16 | use crate::{ |
17 | error::*, |
18 | std::{convert::TryFrom, str}, |
19 | Uuid, |
20 | }; |
21 | |
22 | impl str::FromStr for Uuid { |
23 | type Err = Error; |
24 | |
25 | fn from_str(uuid_str: &str) -> Result<Self, Self::Err> { |
26 | Uuid::parse_str(input:uuid_str) |
27 | } |
28 | } |
29 | |
30 | impl TryFrom<&'_ str> for Uuid { |
31 | type Error = Error; |
32 | |
33 | fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> { |
34 | Uuid::parse_str(input:uuid_str) |
35 | } |
36 | } |
37 | |
38 | impl Uuid { |
39 | /// Parses a `Uuid` from a string of hexadecimal digits with optional |
40 | /// hyphens. |
41 | /// |
42 | /// Any of the formats generated by this module (simple, hyphenated, urn, |
43 | /// Microsoft GUID) are supported by this parsing function. |
44 | /// |
45 | /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics. |
46 | /// This method will be eventually deprecated in favor of `try_parse`. |
47 | /// |
48 | /// # Examples |
49 | /// |
50 | /// Parse a hyphenated UUID: |
51 | /// |
52 | /// ``` |
53 | /// # use uuid::{Uuid, Version, Variant}; |
54 | /// # fn main() -> Result<(), uuid::Error> { |
55 | /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000" )?; |
56 | /// |
57 | /// assert_eq!(Some(Version::Random), uuid.get_version()); |
58 | /// assert_eq!(Variant::RFC4122, uuid.get_variant()); |
59 | /// # Ok(()) |
60 | /// # } |
61 | /// ``` |
62 | /// |
63 | /// [`try_parse`]: #method.try_parse |
64 | pub fn parse_str(input: &str) -> Result<Uuid, Error> { |
65 | try_parse(input.as_bytes()) |
66 | .map(Uuid::from_bytes) |
67 | .map_err(InvalidUuid::into_err) |
68 | } |
69 | |
70 | /// Parses a `Uuid` from a string of hexadecimal digits with optional |
71 | /// hyphens. |
72 | /// |
73 | /// This function is similar to [`parse_str`], in fact `parse_str` shares |
74 | /// the same underlying parser. The difference is that if `try_parse` |
75 | /// fails, it won't generate very useful error messages. The `parse_str` |
76 | /// function will eventually be deprecated in favor of `try_parse`. |
77 | /// |
78 | /// To parse a UUID from a byte stream instead of a UTF8 string, see |
79 | /// [`try_parse_ascii`]. |
80 | /// |
81 | /// # Examples |
82 | /// |
83 | /// Parse a hyphenated UUID: |
84 | /// |
85 | /// ``` |
86 | /// # use uuid::{Uuid, Version, Variant}; |
87 | /// # fn main() -> Result<(), uuid::Error> { |
88 | /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000" )?; |
89 | /// |
90 | /// assert_eq!(Some(Version::Random), uuid.get_version()); |
91 | /// assert_eq!(Variant::RFC4122, uuid.get_variant()); |
92 | /// # Ok(()) |
93 | /// # } |
94 | /// ``` |
95 | /// |
96 | /// [`parse_str`]: #method.parse_str |
97 | /// [`try_parse_ascii`]: #method.try_parse_ascii |
98 | pub const fn try_parse(input: &str) -> Result<Uuid, Error> { |
99 | Self::try_parse_ascii(input.as_bytes()) |
100 | } |
101 | |
102 | /// Parses a `Uuid` from a string of hexadecimal digits with optional |
103 | /// hyphens. |
104 | /// |
105 | /// The input is expected to be a string of ASCII characters. This method |
106 | /// can be more convenient than [`try_parse`] if the UUID is being |
107 | /// parsed from a byte stream instead of from a UTF8 string. |
108 | /// |
109 | /// # Examples |
110 | /// |
111 | /// Parse a hyphenated UUID: |
112 | /// |
113 | /// ``` |
114 | /// # use uuid::{Uuid, Version, Variant}; |
115 | /// # fn main() -> Result<(), uuid::Error> { |
116 | /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000" )?; |
117 | /// |
118 | /// assert_eq!(Some(Version::Random), uuid.get_version()); |
119 | /// assert_eq!(Variant::RFC4122, uuid.get_variant()); |
120 | /// # Ok(()) |
121 | /// # } |
122 | /// ``` |
123 | /// |
124 | /// [`try_parse`]: #method.try_parse |
125 | pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> { |
126 | match try_parse(input) { |
127 | Ok(bytes) => Ok(Uuid::from_bytes(bytes)), |
128 | // If parsing fails then we don't know exactly what went wrong |
129 | // In this case, we just return a generic error |
130 | Err(_) => Err(Error(ErrorKind::Other)), |
131 | } |
132 | } |
133 | } |
134 | |
135 | const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> { |
136 | match (input.len(), input) { |
137 | // Inputs of 32 bytes must be a non-hyphenated UUID |
138 | (32, s: &[u8]) => parse_simple(s), |
139 | // Hyphenated UUIDs may be wrapped in various ways: |
140 | // - `{UUID}` for braced UUIDs |
141 | // - `urn:uuid:UUID` for URNs |
142 | // - `UUID` for a regular hyphenated UUID |
143 | (36, s: &[u8]) |
144 | | (38, [b'{' , s: &[u8] @ .., b'}' ]) |
145 | | (45, [b'u' , b'r' , b'n' , b':' , b'u' , b'u' , b'i' , b'd' , b':' , s: &[u8] @ ..]) => { |
146 | parse_hyphenated(s) |
147 | } |
148 | // Any other shaped input is immediately invalid |
149 | _ => Err(InvalidUuid(input)), |
150 | } |
151 | } |
152 | |
153 | #[inline ] |
154 | #[allow (dead_code)] |
155 | pub(crate) const fn parse_braced(input: &[u8]) -> Result<[u8; 16], InvalidUuid> { |
156 | if let (38, [b'{' , s: &[u8] @ .., b'}' ]) = (input.len(), input) { |
157 | parse_hyphenated(s) |
158 | } else { |
159 | Err(InvalidUuid(input)) |
160 | } |
161 | } |
162 | |
163 | #[inline ] |
164 | #[allow (dead_code)] |
165 | pub(crate) const fn parse_urn(input: &[u8]) -> Result<[u8; 16], InvalidUuid> { |
166 | if let (45, [b'u' , b'r' , b'n' , b':' , b'u' , b'u' , b'i' , b'd' , b':' , s: &[u8] @ ..]) = |
167 | (input.len(), input) |
168 | { |
169 | parse_hyphenated(s) |
170 | } else { |
171 | Err(InvalidUuid(input)) |
172 | } |
173 | } |
174 | |
175 | #[inline ] |
176 | pub(crate) const fn parse_simple(s: &[u8]) -> Result<[u8; 16], InvalidUuid> { |
177 | // This length check here removes all other bounds |
178 | // checks in this function |
179 | if s.len() != 32 { |
180 | return Err(InvalidUuid(s)); |
181 | } |
182 | |
183 | let mut buf: [u8; 16] = [0; 16]; |
184 | let mut i = 0; |
185 | |
186 | while i < 16 { |
187 | // Convert a two-char hex value (like `A8`) |
188 | // into a byte (like `10101000`) |
189 | let h1 = HEX_TABLE[s[i * 2] as usize]; |
190 | let h2 = HEX_TABLE[s[i * 2 + 1] as usize]; |
191 | |
192 | // We use `0xff` as a sentinel value to indicate |
193 | // an invalid hex character sequence (like the letter `G`) |
194 | if h1 | h2 == 0xff { |
195 | return Err(InvalidUuid(s)); |
196 | } |
197 | |
198 | // The upper nibble needs to be shifted into position |
199 | // to produce the final byte value |
200 | buf[i] = SHL4_TABLE[h1 as usize] | h2; |
201 | i += 1; |
202 | } |
203 | |
204 | Ok(buf) |
205 | } |
206 | |
207 | #[inline ] |
208 | const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], InvalidUuid> { |
209 | // This length check here removes all other bounds |
210 | // checks in this function |
211 | if s.len() != 36 { |
212 | return Err(InvalidUuid(s)); |
213 | } |
214 | |
215 | // We look at two hex-encoded values (4 chars) at a time because |
216 | // that's the size of the smallest group in a hyphenated UUID. |
217 | // The indexes we're interested in are: |
218 | // |
219 | // uuid : 936da01f-9abd-4d9d-80c7-02af85c822a8 |
220 | // | | || || || || | | |
221 | // hyphens : | | 8| 13| 18| 23| | | |
222 | // positions: 0 4 9 14 19 24 28 32 |
223 | |
224 | // First, ensure the hyphens appear in the right places |
225 | match [s[8], s[13], s[18], s[23]] { |
226 | [b'-' , b'-' , b'-' , b'-' ] => {} |
227 | _ => return Err(InvalidUuid(s)), |
228 | } |
229 | |
230 | let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32]; |
231 | let mut buf: [u8; 16] = [0; 16]; |
232 | let mut j = 0; |
233 | |
234 | while j < 8 { |
235 | let i = positions[j]; |
236 | |
237 | // The decoding here is the same as the simple case |
238 | // We're just dealing with two values instead of one |
239 | let h1 = HEX_TABLE[s[i as usize] as usize]; |
240 | let h2 = HEX_TABLE[s[(i + 1) as usize] as usize]; |
241 | let h3 = HEX_TABLE[s[(i + 2) as usize] as usize]; |
242 | let h4 = HEX_TABLE[s[(i + 3) as usize] as usize]; |
243 | |
244 | if h1 | h2 | h3 | h4 == 0xff { |
245 | return Err(InvalidUuid(s)); |
246 | } |
247 | |
248 | buf[j * 2] = SHL4_TABLE[h1 as usize] | h2; |
249 | buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4; |
250 | j += 1; |
251 | } |
252 | |
253 | Ok(buf) |
254 | } |
255 | |
256 | const HEX_TABLE: &[u8; 256] = &{ |
257 | let mut buf: [u8; 256] = [0; 256]; |
258 | let mut i: u8 = 0; |
259 | |
260 | loop { |
261 | buf[i as usize] = match i { |
262 | b'0' ..=b'9' => i - b'0' , |
263 | b'a' ..=b'f' => i - b'a' + 10, |
264 | b'A' ..=b'F' => i - b'A' + 10, |
265 | _ => 0xff, |
266 | }; |
267 | |
268 | if i == 255 { |
269 | break buf; |
270 | } |
271 | |
272 | i += 1 |
273 | } |
274 | }; |
275 | |
276 | const SHL4_TABLE: &[u8; 256] = &{ |
277 | let mut buf: [u8; 256] = [0; 256]; |
278 | let mut i: u8 = 0; |
279 | |
280 | loop { |
281 | buf[i as usize] = i.wrapping_shl(4); |
282 | |
283 | if i == 255 { |
284 | break buf; |
285 | } |
286 | |
287 | i += 1; |
288 | } |
289 | }; |
290 | |
291 | #[cfg (test)] |
292 | mod tests { |
293 | use super::*; |
294 | use crate::{std::string::ToString, tests::new}; |
295 | |
296 | #[test ] |
297 | fn test_parse_uuid_v4_valid() { |
298 | let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8" ).unwrap(); |
299 | let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8" ).unwrap(); |
300 | let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8" ).unwrap(); |
301 | let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}" ).unwrap(); |
302 | |
303 | assert_eq!(from_hyphenated, from_simple); |
304 | assert_eq!(from_hyphenated, from_urn); |
305 | assert_eq!(from_hyphenated, from_guid); |
306 | |
307 | assert!(Uuid::parse_str("00000000000000000000000000000000" ).is_ok()); |
308 | assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8" ).is_ok()); |
309 | assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4" ).is_ok()); |
310 | assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8" ).is_ok()); |
311 | assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546" ).is_ok()); |
312 | assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8" ).is_ok()); |
313 | assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}" ).is_ok()); |
314 | |
315 | // Nil |
316 | let nil = Uuid::nil(); |
317 | assert_eq!( |
318 | Uuid::parse_str("00000000000000000000000000000000" ).unwrap(), |
319 | nil |
320 | ); |
321 | assert_eq!( |
322 | Uuid::parse_str("00000000-0000-0000-0000-000000000000" ).unwrap(), |
323 | nil |
324 | ); |
325 | } |
326 | |
327 | #[test ] |
328 | fn test_parse_uuid_v4_invalid() { |
329 | // Invalid |
330 | assert_eq!( |
331 | Uuid::parse_str("" ), |
332 | Err(Error(ErrorKind::SimpleLength { len: 0 })) |
333 | ); |
334 | |
335 | assert_eq!( |
336 | Uuid::parse_str("!" ), |
337 | Err(Error(ErrorKind::Char { |
338 | character: '!' , |
339 | index: 1, |
340 | })) |
341 | ); |
342 | |
343 | assert_eq!( |
344 | Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45" ), |
345 | Err(Error(ErrorKind::GroupLength { |
346 | group: 4, |
347 | len: 13, |
348 | index: 25, |
349 | })) |
350 | ); |
351 | |
352 | assert_eq!( |
353 | Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4" ), |
354 | Err(Error(ErrorKind::GroupLength { |
355 | group: 3, |
356 | len: 3, |
357 | index: 20, |
358 | })) |
359 | ); |
360 | |
361 | assert_eq!( |
362 | Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4" ), |
363 | Err(Error(ErrorKind::Char { |
364 | character: 'G' , |
365 | index: 21, |
366 | })) |
367 | ); |
368 | |
369 | assert_eq!( |
370 | Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4" ), |
371 | Err(Error(ErrorKind::GroupCount { count: 2 })) |
372 | ); |
373 | |
374 | assert_eq!( |
375 | Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4" ), |
376 | Err(Error(ErrorKind::GroupCount { count: 3 })) |
377 | ); |
378 | |
379 | assert_eq!( |
380 | Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4" ), |
381 | Err(Error(ErrorKind::GroupCount { count: 4 })) |
382 | ); |
383 | |
384 | assert_eq!( |
385 | Uuid::parse_str("F9168C5E-CEB2-4faa" ), |
386 | Err(Error(ErrorKind::GroupCount { count: 3 })) |
387 | ); |
388 | |
389 | assert_eq!( |
390 | Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4" ), |
391 | Err(Error(ErrorKind::Char { |
392 | character: 'X' , |
393 | index: 19, |
394 | })) |
395 | ); |
396 | |
397 | assert_eq!( |
398 | Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41" ), |
399 | Err(Error(ErrorKind::Char { |
400 | character: '{' , |
401 | index: 1, |
402 | })) |
403 | ); |
404 | |
405 | assert_eq!( |
406 | Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}" ), |
407 | Err(Error(ErrorKind::GroupCount { count: 3 })) |
408 | ); |
409 | |
410 | assert_eq!( |
411 | Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4" ), |
412 | Err(Error(ErrorKind::GroupLength { |
413 | group: 1, |
414 | len: 3, |
415 | index: 10, |
416 | })) |
417 | ); |
418 | |
419 | // // (group, found, expecting) |
420 | // // |
421 | assert_eq!( |
422 | Uuid::parse_str("01020304-1112-2122-3132-41424344" ), |
423 | Err(Error(ErrorKind::GroupLength { |
424 | group: 4, |
425 | len: 8, |
426 | index: 25, |
427 | })) |
428 | ); |
429 | |
430 | assert_eq!( |
431 | Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c" ), |
432 | Err(Error(ErrorKind::SimpleLength { len: 31 })) |
433 | ); |
434 | |
435 | assert_eq!( |
436 | Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88" ), |
437 | Err(Error(ErrorKind::SimpleLength { len: 33 })) |
438 | ); |
439 | |
440 | assert_eq!( |
441 | Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8" ), |
442 | Err(Error(ErrorKind::Char { |
443 | character: 'g' , |
444 | index: 32, |
445 | })) |
446 | ); |
447 | |
448 | assert_eq!( |
449 | Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8" ), |
450 | Err(Error(ErrorKind::Char { |
451 | character: '%' , |
452 | index: 16, |
453 | })) |
454 | ); |
455 | |
456 | assert_eq!( |
457 | Uuid::parse_str("231231212212423424324323477343246663" ), |
458 | Err(Error(ErrorKind::SimpleLength { len: 36 })) |
459 | ); |
460 | |
461 | assert_eq!( |
462 | Uuid::parse_str("{00000000000000000000000000000000}" ), |
463 | Err(Error(ErrorKind::GroupCount { count: 1 })) |
464 | ); |
465 | |
466 | assert_eq!( |
467 | Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c" ), |
468 | Err(Error(ErrorKind::SimpleLength { len: 31 })) |
469 | ); |
470 | |
471 | assert_eq!( |
472 | Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd" ), |
473 | Err(Error(ErrorKind::Char { |
474 | character: 'X' , |
475 | index: 7, |
476 | })) |
477 | ); |
478 | |
479 | assert_eq!( |
480 | Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c" ), |
481 | Err(Error(ErrorKind::GroupCount { count: 2 })) |
482 | ); |
483 | |
484 | assert_eq!( |
485 | Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4" ), |
486 | Err(Error(ErrorKind::GroupLength { |
487 | group: 3, |
488 | len: 5, |
489 | index: 20, |
490 | })) |
491 | ); |
492 | |
493 | assert_eq!( |
494 | Uuid::parse_str(" \u{bcf3c}" ), |
495 | Err(Error(ErrorKind::Char { |
496 | character: ' \u{bcf3c}' , |
497 | index: 1 |
498 | })) |
499 | ); |
500 | } |
501 | |
502 | #[test ] |
503 | fn test_roundtrip_default() { |
504 | let uuid_orig = new(); |
505 | let orig_str = uuid_orig.to_string(); |
506 | let uuid_out = Uuid::parse_str(&orig_str).unwrap(); |
507 | assert_eq!(uuid_orig, uuid_out); |
508 | } |
509 | |
510 | #[test ] |
511 | fn test_roundtrip_hyphenated() { |
512 | let uuid_orig = new(); |
513 | let orig_str = uuid_orig.hyphenated().to_string(); |
514 | let uuid_out = Uuid::parse_str(&orig_str).unwrap(); |
515 | assert_eq!(uuid_orig, uuid_out); |
516 | } |
517 | |
518 | #[test ] |
519 | fn test_roundtrip_simple() { |
520 | let uuid_orig = new(); |
521 | let orig_str = uuid_orig.simple().to_string(); |
522 | let uuid_out = Uuid::parse_str(&orig_str).unwrap(); |
523 | assert_eq!(uuid_orig, uuid_out); |
524 | } |
525 | |
526 | #[test ] |
527 | fn test_roundtrip_urn() { |
528 | let uuid_orig = new(); |
529 | let orig_str = uuid_orig.urn().to_string(); |
530 | let uuid_out = Uuid::parse_str(&orig_str).unwrap(); |
531 | assert_eq!(uuid_orig, uuid_out); |
532 | } |
533 | |
534 | #[test ] |
535 | fn test_roundtrip_braced() { |
536 | let uuid_orig = new(); |
537 | let orig_str = uuid_orig.braced().to_string(); |
538 | let uuid_out = Uuid::parse_str(&orig_str).unwrap(); |
539 | assert_eq!(uuid_orig, uuid_out); |
540 | } |
541 | |
542 | #[test ] |
543 | fn test_roundtrip_parse_urn() { |
544 | let uuid_orig = new(); |
545 | let orig_str = uuid_orig.urn().to_string(); |
546 | let uuid_out = Uuid::from_bytes(parse_urn(orig_str.as_bytes()).unwrap()); |
547 | assert_eq!(uuid_orig, uuid_out); |
548 | } |
549 | |
550 | #[test ] |
551 | fn test_roundtrip_parse_braced() { |
552 | let uuid_orig = new(); |
553 | let orig_str = uuid_orig.braced().to_string(); |
554 | let uuid_out = Uuid::from_bytes(parse_braced(orig_str.as_bytes()).unwrap()); |
555 | assert_eq!(uuid_orig, uuid_out); |
556 | } |
557 | |
558 | #[test ] |
559 | fn test_try_parse_ascii_non_utf8() { |
560 | assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5 \0e0c8" ).is_err()); |
561 | } |
562 | } |
563 | |