char.rs source code [crates/arrayvec/src/char.rs]

1	// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
2	// file at the top-level directory of this distribution and at
3	// http://rust-lang.org/COPYRIGHT.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10	//
11	// Original authors: alexchrichton, bluss
12
13	// UTF-8 ranges and tags for encoding characters
14	const TAG_CONT: u8 = `0b1000_0000`;
15	const TAG_TWO_B: u8 = `0b1100_0000`;
16	const TAG_THREE_B: u8 = `0b1110_0000`;
17	const TAG_FOUR_B: u8 = `0b1111_0000`;
18	const MAX_ONE_B: u32 = `0x80`;
19	const MAX_TWO_B: u32 = `0x800`;
20	const MAX_THREE_B: u32 = `0x10000`;
21
22	/// Placeholder
23	pub struct EncodeUtf8Error;
24
25	/// Encode a char into buf using UTF-8.
26	///
27	/// On success, return the byte length of the encoding (1, 2, 3 or 4).<br>
28	/// On error, return `EncodeUtf8Error` if the buffer was too short for the char.
29	///
30	/// Safety: `ptr` must be writable for `len` bytes.
31	#[inline]
32	pub unsafe fn encode_utf8(ch: char, ptr: *mut u8, len: usize) -> Result<usize, EncodeUtf8Error>
33	{
34	let code: u32 = ch as u32;
35	if code < MAX_ONE_B && len >= `1` {
36	ptr.add(`0`).write(val:code as u8);
37	return Ok(`1`);
38	} else if code < MAX_TWO_B && len >= `2` {
39	ptr.add(`0`).write((code >> `6` & `0x1F`) as u8 \| TAG_TWO_B);
40	ptr.add(`1`).write((code & `0x3F`) as u8 \| TAG_CONT);
41	return Ok(`2`);
42	} else if code < MAX_THREE_B && len >= `3` {
43	ptr.add(`0`).write((code >> `12` & `0x0F`) as u8 \| TAG_THREE_B);
44	ptr.add(`1`).write((code >> `6` & `0x3F`) as u8 \| TAG_CONT);
45	ptr.add(`2`).write((code & `0x3F`) as u8 \| TAG_CONT);
46	return Ok(`3`);
47	} else if len >= `4` {
48	ptr.add(`0`).write((code >> `18` & `0x07`) as u8 \| TAG_FOUR_B);
49	ptr.add(`1`).write((code >> `12` & `0x3F`) as u8 \| TAG_CONT);
50	ptr.add(`2`).write((code >> `6` & `0x3F`) as u8 \| TAG_CONT);
51	ptr.add(`3`).write((code & `0x3F`) as u8 \| TAG_CONT);
52	return Ok(`4`);
53	};
54	Err(EncodeUtf8Error)
55	}
56
57
58	#[test]
59	#[cfg_attr(miri, ignore)] // Miri is too slow
60	fn test_encode_utf8() {
61	// Test that all codepoints are encoded correctly
62	let mut data = [`0u8`; `16`];
63	for codepoint in `0`..=(std::char::MAX as u32) {
64	if let Some(ch) = std::char::from_u32(codepoint) {
65	for elt in &mut data { *elt = `0`; }
66	let ptr = data.as_mut_ptr();
67	let len = data.len();
68	unsafe {
69	let res = encode_utf8(ch, ptr, len).ok().unwrap();
70	assert_eq!(res, ch.len_utf8());
71	}
72	let string = std::str::from_utf8(&data).unwrap();
73	assert_eq!(string.chars().next(), Some(ch));
74	}
75	}
76	}
77
78	#[test]
79	fn test_encode_utf8_oob() {
80	// test that we report oob if the buffer is too short
81	let mut data = [`0u8`; `16`];
82	let chars = ['a', 'α', '�', '𐍈'];
83	for (len, &ch) in (`1`..=`4`).zip(&chars) {
84	assert_eq!(len, ch.len_utf8(), "Len of ch={}", ch);
85	let ptr = data.as_mut_ptr();
86	unsafe {
87	assert!(matches::matches!(encode_utf8(ch, ptr, len - `1`), Err(_)));
88	assert!(matches::matches!(encode_utf8(ch, ptr, len), Ok(_)));
89	}
90	}
91	}
92
93