utf8.rs source code [crates/jiff/src/shared/util/utf8.rs]

1	/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
2	///
3	/// If no valid encoding of a codepoint exists at the beginning of the
4	/// given byte slice, then the first byte is returned instead.
5	///
6	/// This returns `None` if and only if `bytes` is empty.
7	///
8	/// This never panics.
9	///
10	/// WARNING: This is not designed for performance. If you're looking for
11	/// a fast UTF-8 decoder, this is not it. If you feel like you need one in
12	/// this crate, then please file an issue and discuss your use case.
13	pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, u8>> {
14	/// Given a UTF-8 leading byte, this returns the total number of code
15	/// units in the following encoded codepoint.
16	///
17	/// If the given byte is not a valid UTF-8 leading byte, then this
18	/// returns `None`.
19	fn utf8_len(byte: u8) -> Option<usize> {
20	if byte <= `0x7F` {
21	return Some(`1`);
22	} else if byte & `0b1100_0000` == `0b1000_0000` {
23	return None;
24	} else if byte <= `0b1101_1111` {
25	Some(`2`)
26	} else if byte <= `0b1110_1111` {
27	Some(`3`)
28	} else if byte <= `0b1111_0111` {
29	Some(`4`)
30	} else {
31	None
32	}
33	}
34
35	if bytes.is_empty() {
36	return None;
37	}
38	let len = match utf8_len(bytes[`0`]) {
39	None => return Some(Err(bytes[`0`])),
40	Some(len) if len > bytes.len() => return Some(Err(bytes[`0`])),
41	Some(`1`) => return Some(Ok(char::from(bytes[`0`]))),
42	Some(len) => len,
43	};
44	match core::str::from_utf8(&bytes[..len]) {
45	Ok(s) => Some(Ok(s.chars().next().unwrap())),
46	Err(_) => Some(Err(bytes[`0`])),
47	}
48	}
49