mem.rs source code [crates/encoding_rs/src/mem.rs]

1	// Copyright Mozilla Foundation. See the COPYRIGHT
2	// file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	//! Functions for converting between different in-RAM representations of text
11	//! and for quickly checking if the Unicode Bidirectional Algorithm can be
12	//! avoided.
13	//!
14	//! By using slices for output, the functions here seek to enable by-register
15	//! (ALU register or SIMD register as available) operations in order to
16	//! outperform iterator-based conversions available in the Rust standard
17	//! library.
18	//!
19	//! _Note:_ "Latin1" in this module refers to the Unicode range from U+0000 to
20	//! U+00FF, inclusive, and does not refer to the windows-1252 range. This
21	//! in-memory encoding is sometimes used as a storage optimization of text
22	//! when UTF-16 indexing and length semantics are exposed.
23	//!
24	//! The FFI binding for this module are in the
25	//! [encoding_c_mem crate](https://github.com/hsivonen/encoding_c_mem).
26
27	#[cfg(feature = "alloc")]
28	use alloc::borrow::Cow;
29	#[cfg(feature = "alloc")]
30	use alloc::string::String;
31	#[cfg(feature = "alloc")]
32	use alloc::vec::Vec;
33
34	use super::in_inclusive_range16;
35	use super::in_inclusive_range32;
36	use super::in_inclusive_range8;
37	use super::in_range16;
38	use super::in_range32;
39	use super::DecoderResult;
40	use crate::ascii::*;
41	use crate::utf_8::*;
42
43	macro_rules! non_fuzz_debug_assert {
44	($($arg:tt)) => (if !cfg!(fuzzing) { debug_assert!($($arg)); })
45	}
46
47	cfg_if! {
48	if #[cfg(feature = "simd-accel")] {
49	use ::core::intrinsics::likely;
50	use ::core::intrinsics::unlikely;
51	} else {
52	#[inline(always)]
53	fn likely(b: bool) -> bool {
54	b
55	}
56	#[inline(always)]
57	fn unlikely(b: bool) -> bool {
58	b
59	}
60	}
61	}
62
63	/// Classification of text as Latin1 (all code points are below U+0100),
64	/// left-to-right with some non-Latin1 characters or as containing at least
65	/// some right-to-left characters.
66	#[must_use]
67	#[derive(Debug, PartialEq, Eq)]
68	#[repr(C)]
69	pub enum Latin1Bidi {
70	/// Every character is below U+0100.
71	Latin1 = `0`,
72	/// There is at least one character that's U+0100 or higher, but there
73	/// are no right-to-left characters.
74	LeftToRight = `1`,
75	/// There is at least one right-to-left character.
76	Bidi = `2`,
77	}
78
79	// `as` truncates, so works on 32-bit, too.
80	#[allow(dead_code)]
81	const LATIN1_MASK: usize = `0xFF00_FF00_FF00_FF00u64` as usize;
82
83	#[allow(unused_macros)]
84	macro_rules! by_unit_check_alu {
85	($name:ident, $unit:ty, $bound:expr, $mask:ident) => {
86	#[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
87	#[inline(always)]
88	fn $name(buffer: &[$unit]) -> bool {
89	let mut offset = `0usize`;
90	let mut accu = `0usize`;
91	let unit_size = ::core::mem::size_of::<$unit>();
92	let len = buffer.len();
93	if len >= ALU_ALIGNMENT / unit_size {
94	// The most common reason to return `false` is for the first code
95	// unit to fail the test, so check that first.
96	if buffer[`0`] >= $bound {
97	return `false`;
98	}
99	let src = buffer.as_ptr();
100	let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK))
101	& ALU_ALIGNMENT_MASK)
102	/ unit_size;
103	if until_alignment + ALU_ALIGNMENT / unit_size <= len {
104	if until_alignment != `0` {
105	accu \|= buffer[offset] as usize;
106	offset += `1`;
107	until_alignment -= `1`;
108	while until_alignment != `0` {
109	accu \|= buffer[offset] as usize;
110	offset += `1`;
111	until_alignment -= `1`;
112	}
113	if accu >= $bound {
114	return `false`;
115	}
116	}
117	let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
118	if offset + (`4` * (ALU_ALIGNMENT / unit_size)) <= len {
119	// Safety: the above check lets us perform 4 consecutive reads of
120	// length ALU_ALIGNMENT / unit_size. ALU_ALIGNMENT is the size of usize, and unit_size
121	// is the size of the `src` pointer, so this is equal to performing four usize reads.
122	//
123	// This invariant is upheld on all loop iterations
124	let len_minus_unroll = len - (`4` * (ALU_ALIGNMENT / unit_size));
125	loop {
126	let unroll_accu = unsafe { (src.add(offset) as const usize) }
127	\| unsafe {
128	(src.add(offset + (ALU_ALIGNMENT / unit_size)) as const usize)
129	}
130	\| unsafe {
131	(src.add(offset + (`2` (ALU_ALIGNMENT / unit_size)))
132	as *const usize)
133	}
134	\| unsafe {
135	(src.add(offset + (`3` (ALU_ALIGNMENT / unit_size)))
136	as *const usize)
137	};
138	if unroll_accu & $mask != `0` {
139	return `false`;
140	}
141	offset += `4` * (ALU_ALIGNMENT / unit_size);
142	// Safety: this check lets us continue to perform the 4 reads earlier
143	if offset > len_minus_unroll {
144	break;
145	}
146	}
147	}
148	while offset <= len_minus_stride {
149	// Safety: the above check lets us perform one usize read.
150	accu \|= unsafe { (src.add(offset) as const usize) };
151	offset += ALU_ALIGNMENT / unit_size;
152	}
153	}
154	}
155	for &unit in &buffer[offset..] {
156	accu \|= unit as usize;
157	}
158	accu & $mask == `0`
159	}
160	};
161	}
162
163	#[allow(unused_macros)]
164	macro_rules! by_unit_check_simd {
165	($name:ident, $unit:ty, $splat:expr, $simd_ty:ty, $bound:expr, $func:ident) => {
166	#[inline(always)]
167	fn $name(buffer: &[$unit]) -> bool {
168	let mut offset = `0usize`;
169	let mut accu = `0usize`;
170	let unit_size = ::core::mem::size_of::<$unit>();
171	let len = buffer.len();
172	if len >= SIMD_STRIDE_SIZE / unit_size {
173	// The most common reason to return `false` is for the first code
174	// unit to fail the test, so check that first.
175	if buffer[`0`] >= $bound {
176	return `false`;
177	}
178	let src = buffer.as_ptr();
179	let mut until_alignment = ((SIMD_ALIGNMENT
180	- ((src as usize) & SIMD_ALIGNMENT_MASK))
181	& SIMD_ALIGNMENT_MASK)
182	/ unit_size;
183	if until_alignment + SIMD_STRIDE_SIZE / unit_size <= len {
184	if until_alignment != `0` {
185	accu \|= buffer[offset] as usize;
186	offset += `1`;
187	until_alignment -= `1`;
188	while until_alignment != `0` {
189	accu \|= buffer[offset] as usize;
190	offset += `1`;
191	until_alignment -= `1`;
192	}
193	if accu >= $bound {
194	return `false`;
195	}
196	}
197	let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
198	if offset + (`4` * (SIMD_STRIDE_SIZE / unit_size)) <= len {
199	// Safety: the above check lets us perform 4 consecutive reads of
200	// length SIMD_STRIDE_SIZE / unit_size. SIMD_STRIDE_SIZE is the size of $simd_ty, and unit_size
201	// is the size of the `src` pointer, so this is equal to performing four $simd_ty reads.
202	//
203	// This invariant is upheld on all loop iterations
204	let len_minus_unroll = len - (`4` * (SIMD_STRIDE_SIZE / unit_size));
205	loop {
206	let unroll_accu = unsafe { (src.add(offset) as const $simd_ty) }
207	\| unsafe {
208	*(src.add(offset + (SIMD_STRIDE_SIZE / unit_size))
209	as *const $simd_ty)
210	}
211	\| unsafe {
212	(src.add(offset + (`2` (SIMD_STRIDE_SIZE / unit_size)))
213	as *const $simd_ty)
214	}
215	\| unsafe {
216	(src.add(offset + (`3` (SIMD_STRIDE_SIZE / unit_size)))
217	as *const $simd_ty)
218	};
219	if !$func(unroll_accu) {
220	return `false`;
221	}
222	offset += `4` * (SIMD_STRIDE_SIZE / unit_size);
223	// Safety: this check lets us continue to perform the 4 reads earlier
224	if offset > len_minus_unroll {
225	break;
226	}
227	}
228	}
229	let mut simd_accu = $splat;
230	while offset <= len_minus_stride {
231	// Safety: the above check lets us perform one $simd_ty read.
232	simd_accu = simd_accu \| unsafe { (src.add(offset) as const $simd_ty) };
233	offset += SIMD_STRIDE_SIZE / unit_size;
234	}
235	if !$func(simd_accu) {
236	return `false`;
237	}
238	}
239	}
240	for &unit in &buffer[offset..] {
241	accu \|= unit as usize;
242	}
243	accu < $bound
244	}
245	};
246	}
247
248	cfg_if! {
249	if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
250	use crate::simd_funcs::*;
251	use core::simd::u8x16;
252	use core::simd::u16x8;
253
254	const SIMD_ALIGNMENT: usize = `16`;
255
256	const SIMD_ALIGNMENT_MASK: usize = `15`;
257
258	by_unit_check_simd!(is_ascii_impl, u8, u8x16::splat(`0`), u8x16, `0x80`, simd_is_ascii);
259	by_unit_check_simd!(is_basic_latin_impl, u16, u16x8::splat(`0`), u16x8, `0x80`, simd_is_basic_latin);
260	by_unit_check_simd!(is_utf16_latin1_impl, u16, u16x8::splat(`0`), u16x8, `0x100`, simd_is_latin1);
261
262	#[inline(always)]
263	fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
264	// This function is a mess, because it simultaneously tries to do
265	// only aligned SIMD (perhaps misguidedly) and needs to deal with
266	// the last code unit in a SIMD stride being part of a valid
267	// surrogate pair.
268	let unit_size = ::core::mem::size_of::<u16>();
269	let src = buffer.as_ptr();
270	let len = buffer.len();
271	let mut offset = `0usize`;
272	'outer: loop {
273	let until_alignment = ((SIMD_ALIGNMENT - ((unsafe { src.add(offset) } as usize) & SIMD_ALIGNMENT_MASK)) &
274	SIMD_ALIGNMENT_MASK) / unit_size;
275	if until_alignment == `0` {
276	if offset + SIMD_STRIDE_SIZE / unit_size > len {
277	break;
278	}
279	} else {
280	let offset_plus_until_alignment = offset + until_alignment;
281	let offset_plus_until_alignment_plus_one = offset_plus_until_alignment + `1`;
282	if offset_plus_until_alignment_plus_one + SIMD_STRIDE_SIZE / unit_size > len {
283	break;
284	}
285	let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_until_alignment_plus_one]);
286	if up_to < until_alignment {
287	return offset + up_to;
288	}
289	if last_valid_low {
290	offset = offset_plus_until_alignment_plus_one;
291	continue;
292	}
293	offset = offset_plus_until_alignment;
294	}
295	let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
296	loop {
297	let offset_plus_stride = offset + SIMD_STRIDE_SIZE / unit_size;
298	if contains_surrogates(unsafe { (src.add(offset) as const u16x8) }) {
299	if offset_plus_stride == len {
300	break 'outer;
301	}
302	let offset_plus_stride_plus_one = offset_plus_stride + `1`;
303	let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_stride_plus_one]);
304	if up_to < SIMD_STRIDE_SIZE / unit_size {
305	return offset + up_to;
306	}
307	if last_valid_low {
308	offset = offset_plus_stride_plus_one;
309	continue 'outer;
310	}
311	}
312	offset = offset_plus_stride;
313	if offset > len_minus_stride {
314	break 'outer;
315	}
316	}
317	}
318	let (up_to, _) = utf16_valid_up_to_alu(&buffer[offset..]);
319	offset + up_to
320	}
321	} else {
322	by_unit_check_alu!(is_ascii_impl, u8, `0x80`, ASCII_MASK);
323	by_unit_check_alu!(is_basic_latin_impl, u16, `0x80`, BASIC_LATIN_MASK);
324	by_unit_check_alu!(is_utf16_latin1_impl, u16, `0x100`, LATIN1_MASK);
325
326	#[inline(always)]
327	fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
328	let (up_to, _) = utf16_valid_up_to_alu(buffer);
329	up_to
330	}
331	}
332	}
333
334	/// The second return value is true iff the last code unit of the slice was
335	/// reached and turned out to be a low surrogate that is part of a valid pair.
336	#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
337	#[inline(always)]
338	fn utf16_valid_up_to_alu(buffer: &[u16]) -> (usize, bool) {
339	let len = buffer.len();
340	if len == `0` {
341	return (`0`, `false`);
342	}
343	let mut offset = `0usize`;
344	loop {
345	let unit = buffer[offset];
346	let next = offset + `1`;
347	let unit_minus_surrogate_start = unit.wrapping_sub(`0xD800`);
348	if unit_minus_surrogate_start > (`0xDFFF` - `0xD800`) {
349	// Not a surrogate
350	offset = next;
351	if offset == len {
352	return (offset, `false`);
353	}
354	continue;
355	}
356	if unit_minus_surrogate_start <= (`0xDBFF` - `0xD800`) {
357	// high surrogate
358	if next < len {
359	let second = buffer[next];
360	let second_minus_low_surrogate_start = second.wrapping_sub(`0xDC00`);
361	if second_minus_low_surrogate_start <= (`0xDFFF` - `0xDC00`) {
362	// The next code unit is a low surrogate. Advance position.
363	offset = next + `1`;
364	if offset == len {
365	return (offset, `true`);
366	}
367	continue;
368	}
369	// The next code unit is not a low surrogate. Don't advance
370	// position and treat the high surrogate as unpaired.
371	// fall through
372	}
373	// Unpaired, fall through
374	}
375	// Unpaired surrogate
376	return (offset, `false`);
377	}
378	}
379
380	cfg_if! {
381	if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
382	#[inline(always)]
383	fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
384	let mut offset = `0usize`;
385	let bytes = buffer.as_bytes();
386	let len = bytes.len();
387	if len >= SIMD_STRIDE_SIZE {
388	let src = bytes.as_ptr();
389	let mut until_alignment = (SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
390	SIMD_ALIGNMENT_MASK;
391	if until_alignment + SIMD_STRIDE_SIZE <= len {
392	while until_alignment != `0` {
393	if bytes[offset] > `0xC3` {
394	return Some(offset);
395	}
396	offset += `1`;
397	until_alignment -= `1`;
398	}
399	let len_minus_stride = len - SIMD_STRIDE_SIZE;
400	loop {
401	if !simd_is_str_latin1(unsafe { (src.add(offset) as const u8x16) }) {
402	// TODO: Ensure this compiles away when inlined into `is_str_latin1()`.
403	while bytes[offset] & `0xC0` == `0x80` {
404	offset += `1`;
405	}
406	return Some(offset);
407	}
408	offset += SIMD_STRIDE_SIZE;
409	if offset > len_minus_stride {
410	break;
411	}
412	}
413	}
414	}
415	for i in offset..len {
416	if bytes[i] > `0xC3` {
417	return Some(i);
418	}
419	}
420	None
421	}
422	} else {
423	#[inline(always)]
424	fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
425	let mut bytes = buffer.as_bytes();
426	let mut total = `0`;
427	loop {
428	if let Some((byte, offset)) = validate_ascii(bytes) {
429	total += offset;
430	if byte > `0xC3` {
431	return Some(total);
432	}
433	bytes = &bytes[offset + `2`..];
434	total += `2`;
435	} else {
436	return None;
437	}
438	}
439	}
440	}
441	}
442
443	#[inline(always)]
444	fn is_utf8_latin1_impl(buffer: &[u8]) -> Option<usize> {
445	let mut bytes: &[u8] = buffer;
446	let mut total: usize = `0`;
447	loop {
448	if let Some((byte: u8, offset: usize)) = validate_ascii(slice:bytes) {
449	total += offset;
450	if in_inclusive_range8(i:byte, start:`0xC2`, end:`0xC3`) {
451	let next: usize = offset + `1`;
452	if next == bytes.len() {
453	return Some(total);
454	}
455	if bytes[next] & `0xC0` != `0x80` {
456	return Some(total);
457	}
458	bytes = &bytes[offset + `2`..];
459	total += `2`;
460	} else {
461	return Some(total);
462	}
463	} else {
464	return None;
465	}
466	}
467	}
468
469	cfg_if! {
470	if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
471	#[inline(always)]
472	fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
473	let mut offset = `0usize`;
474	let len = buffer.len();
475	if len >= SIMD_STRIDE_SIZE / `2` {
476	let src = buffer.as_ptr();
477	let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
478	SIMD_ALIGNMENT_MASK) / `2`;
479	if until_alignment + (SIMD_STRIDE_SIZE / `2`) <= len {
480	while until_alignment != `0` {
481	if is_utf16_code_unit_bidi(buffer[offset]) {
482	return `true`;
483	}
484	offset += `1`;
485	until_alignment -= `1`;
486	}
487	let len_minus_stride = len - (SIMD_STRIDE_SIZE / `2`);
488	loop {
489	if is_u16x8_bidi(unsafe { (src.add(offset) as const u16x8) }) {
490	return `true`;
491	}
492	offset += SIMD_STRIDE_SIZE / `2`;
493	if offset > len_minus_stride {
494	break;
495	}
496	}
497	}
498	}
499	for &u in &buffer[offset..] {
500	if is_utf16_code_unit_bidi(u) {
501	return `true`;
502	}
503	}
504	`false`
505	}
506	} else {
507	#[inline(always)]
508	fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
509	for &u in buffer {
510	if is_utf16_code_unit_bidi(u) {
511	return `true`;
512	}
513	}
514	`false`
515	}
516	}
517	}
518
519	cfg_if! {
520	if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
521	#[inline(always)]
522	fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
523	let mut offset = `0usize`;
524	let len = buffer.len();
525	if len >= SIMD_STRIDE_SIZE / `2` {
526	let src = buffer.as_ptr();
527	let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
528	SIMD_ALIGNMENT_MASK) / `2`;
529	if until_alignment + (SIMD_STRIDE_SIZE / `2`) <= len {
530	while until_alignment != `0` {
531	if buffer[offset] > `0xFF` {
532	// This transition isn't optimal, since the aligment is recomputing
533	// but not tweaking further today.
534	if is_utf16_bidi_impl(&buffer[offset..]) {
535	return Latin1Bidi::Bidi;
536	}
537	return Latin1Bidi::LeftToRight;
538	}
539	offset += `1`;
540	until_alignment -= `1`;
541	}
542	let len_minus_stride = len - (SIMD_STRIDE_SIZE / `2`);
543	loop {
544	let mut s = unsafe { (src.add(offset) as const u16x8) };
545	if !simd_is_latin1(s) {
546	loop {
547	if is_u16x8_bidi(s) {
548	return Latin1Bidi::Bidi;
549	}
550	offset += SIMD_STRIDE_SIZE / `2`;
551	if offset > len_minus_stride {
552	for &u in &buffer[offset..] {
553	if is_utf16_code_unit_bidi(u) {
554	return Latin1Bidi::Bidi;
555	}
556	}
557	return Latin1Bidi::LeftToRight;
558	}
559	s = unsafe { (src.add(offset) as const u16x8) };
560	}
561	}
562	offset += SIMD_STRIDE_SIZE / `2`;
563	if offset > len_minus_stride {
564	break;
565	}
566	}
567	}
568	}
569	let mut iter = (&buffer[offset..]).iter();
570	loop {
571	if let Some(&u) = iter.next() {
572	if u > `0xFF` {
573	let mut inner_u = u;
574	loop {
575	if is_utf16_code_unit_bidi(inner_u) {
576	return Latin1Bidi::Bidi;
577	}
578	if let Some(&code_unit) = iter.next() {
579	inner_u = code_unit;
580	} else {
581	return Latin1Bidi::LeftToRight;
582	}
583	}
584	}
585	} else {
586	return Latin1Bidi::Latin1;
587	}
588	}
589	}
590	} else {
591	#[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
592	#[inline(always)]
593	fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
594	let mut offset = `0usize`;
595	let len = buffer.len();
596	if len >= ALU_ALIGNMENT / `2` {
597	let src = buffer.as_ptr();
598	let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) &
599	ALU_ALIGNMENT_MASK) / `2`;
600	if until_alignment + ALU_ALIGNMENT / `2` <= len {
601	while until_alignment != `0` {
602	if buffer[offset] > `0xFF` {
603	if is_utf16_bidi_impl(&buffer[offset..]) {
604	return Latin1Bidi::Bidi;
605	}
606	return Latin1Bidi::LeftToRight;
607	}
608	offset += `1`;
609	until_alignment -= `1`;
610	}
611	let len_minus_stride = len - ALU_ALIGNMENT / `2`;
612	loop {
613	if unsafe { (src.add(offset) as const usize) } & LATIN1_MASK != `0` {
614	if is_utf16_bidi_impl(&buffer[offset..]) {
615	return Latin1Bidi::Bidi;
616	}
617	return Latin1Bidi::LeftToRight;
618	}
619	offset += ALU_ALIGNMENT / `2`;
620	if offset > len_minus_stride {
621	break;
622	}
623	}
624	}
625	}
626	let mut iter = (&buffer[offset..]).iter();
627	loop {
628	if let Some(&u) = iter.next() {
629	if u > `0xFF` {
630	let mut inner_u = u;
631	loop {
632	if is_utf16_code_unit_bidi(inner_u) {
633	return Latin1Bidi::Bidi;
634	}
635	if let Some(&code_unit) = iter.next() {
636	inner_u = code_unit;
637	} else {
638	return Latin1Bidi::LeftToRight;
639	}
640	}
641	}
642	} else {
643	return Latin1Bidi::Latin1;
644	}
645	}
646	}
647	}
648	}
649
650	/// Checks whether the buffer is all-ASCII.
651	///
652	/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
653	/// is not guaranteed to fail fast.)
654	pub fn is_ascii(buffer: &[u8]) -> bool {
655	is_ascii_impl(buffer)
656	}
657
658	/// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing
659	/// only ASCII characters).
660	///
661	/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
662	/// is not guaranteed to fail fast.)
663	pub fn is_basic_latin(buffer: &[u16]) -> bool {
664	is_basic_latin_impl(buffer)
665	}
666
667	/// Checks whether the buffer is valid UTF-8 representing only code points
668	/// less than or equal to U+00FF.
669	///
670	/// Fails fast. (I.e. returns before having read the whole buffer if UTF-8
671	/// invalidity or code points above U+00FF are discovered.
672	pub fn is_utf8_latin1(buffer: &[u8]) -> bool {
673	is_utf8_latin1_impl(buffer).is_none()
674	}
675
676	/// Checks whether the buffer represents only code points less than or equal
677	/// to U+00FF.
678	///
679	/// Fails fast. (I.e. returns before having read the whole buffer if code
680	/// points above U+00FF are discovered.
681	pub fn is_str_latin1(buffer: &str) -> bool {
682	is_str_latin1_impl(buffer).is_none()
683	}
684
685	/// Checks whether the buffer represents only code point less than or equal
686	/// to U+00FF.
687	///
688	/// May read the entire buffer even if it isn't all-Latin1. (I.e. the function
689	/// is not guaranteed to fail fast.)
690	pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
691	is_utf16_latin1_impl(buffer)
692	}
693
694	/// Checks whether a potentially-invalid UTF-8 buffer contains code points
695	/// that trigger right-to-left processing.
696	///
697	/// The check is done on a Unicode block basis without regard to assigned
698	/// vs. unassigned code points in the block. Hebrew presentation forms in
699	/// the Alphabetic Presentation Forms block are treated as if they formed
700	/// a block on their own (i.e. it treated as right-to-left). Additionally,
701	/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
702	/// for. Control characters that are technically bidi controls but do not
703	/// cause right-to-left behavior without the presence of right-to-left
704	/// characters or right-to-left controls are not checked for. As a special
705	/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
706	///
707	/// Returns `true` if the input is invalid UTF-8 or the input contains an
708	/// RTL character. Returns `false` if the input is valid UTF-8 and contains
709	/// no RTL characters.
710	#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if, cyclomatic_complexity))]
711	#[inline]
712	pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
713	// As of rustc 1.25.0-nightly (73ac5d6a8 2018-01-11), this is faster
714	// than UTF-8 validation followed by `is_str_bidi()` for German,
715	// Russian and Japanese. However, this is considerably slower for Thai.
716	// Chances are that the compiler makes some branch predictions that are
717	// unfortunate for Thai. Not spending the time to manually optimize
718	// further at this time, since it's unclear if this variant even has
719	// use cases. However, this is worth revisiting once Rust gets the
720	// ability to annotate relative priorities of match arms.
721
722	// U+058F: D6 8F
723	// U+0590: D6 90
724	// U+08FF: E0 A3 BF
725	// U+0900: E0 A4 80
726	//
727	// U+200F: E2 80 8F
728	// U+202B: E2 80 AB
729	// U+202E: E2 80 AE
730	// U+2067: E2 81 A7
731	//
732	// U+FB1C: EF AC 9C
733	// U+FB1D: EF AC 9D
734	// U+FDFF: EF B7 BF
735	// U+FE00: EF B8 80
736	//
737	// U+FE6F: EF B9 AF
738	// U+FE70: EF B9 B0
739	// U+FEFE: EF BB BE
740	// U+FEFF: EF BB BF
741	//
742	// U+107FF: F0 90 9F BF
743	// U+10800: F0 90 A0 80
744	// U+10FFF: F0 90 BF BF
745	// U+11000: F0 91 80 80
746	//
747	// U+1E7FF: F0 9E 9F BF
748	// U+1E800: F0 9E A0 80
749	// U+1EFFF: F0 9E BF BF
750	// U+1F000: F0 9F 80 80
751	let mut src = buffer;
752	'outer: loop {
753	if let Some((mut byte, mut read)) = validate_ascii(src) {
754	// Check for the longest sequence to avoid checking twice for the
755	// multi-byte sequences.
756	if read + `4` <= src.len() {
757	'inner: loop {
758	// At this point, `byte` is not included in `read`.
759	match byte {
760	`0`..=`0x7F` => {
761	// ASCII: go back to SIMD.
762	read += `1`;
763	src = &src[read..];
764	continue 'outer;
765	}
766	`0xC2`..=`0xD5` => {
767	// Two-byte
768	let second = unsafe { *(src.get_unchecked(read + `1`)) };
769	if !in_inclusive_range8(second, `0x80`, `0xBF`) {
770	return `true`;
771	}
772	read += `2`;
773	}
774	`0xD6` => {
775	// Two-byte
776	let second = unsafe { *(src.get_unchecked(read + `1`)) };
777	if !in_inclusive_range8(second, `0x80`, `0xBF`) {
778	return `true`;
779	}
780	// XXX consider folding the above and below checks
781	if second > `0x8F` {
782	return `true`;
783	}
784	read += `2`;
785	}
786	// two-byte starting with 0xD7 and above is bidi
787	`0xE1` \| `0xE3`..=`0xEC` \| `0xEE` => {
788	// Three-byte normal
789	let second = unsafe { *(src.get_unchecked(read + `1`)) };
790	let third = unsafe { *(src.get_unchecked(read + `2`)) };
791	if ((UTF8_DATA.table[usize::from(second)]
792	& unsafe {
793	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
794	})
795	\| (third >> `6`))
796	!= `2`
797	{
798	return `true`;
799	}
800	read += `3`;
801	}
802	`0xE2` => {
803	// Three-byte normal, potentially bidi
804	let second = unsafe { *(src.get_unchecked(read + `1`)) };
805	let third = unsafe { *(src.get_unchecked(read + `2`)) };
806	if ((UTF8_DATA.table[usize::from(second)]
807	& unsafe {
808	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
809	})
810	\| (third >> `6`))
811	!= `2`
812	{
813	return `true`;
814	}
815	if second == `0x80` {
816	if third == `0x8F` \|\| third == `0xAB` \|\| third == `0xAE` {
817	return `true`;
818	}
819	} else if second == `0x81` {
820	if third == `0xA7` {
821	return `true`;
822	}
823	}
824	read += `3`;
825	}
826	`0xEF` => {
827	// Three-byte normal, potentially bidi
828	let second = unsafe { *(src.get_unchecked(read + `1`)) };
829	let third = unsafe { *(src.get_unchecked(read + `2`)) };
830	if ((UTF8_DATA.table[usize::from(second)]
831	& unsafe {
832	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
833	})
834	\| (third >> `6`))
835	!= `2`
836	{
837	return `true`;
838	}
839	if in_inclusive_range8(second, `0xAC`, `0xB7`) {
840	if second == `0xAC` {
841	if third > `0x9C` {
842	return `true`;
843	}
844	} else {
845	return `true`;
846	}
847	} else if in_inclusive_range8(second, `0xB9`, `0xBB`) {
848	if second == `0xB9` {
849	if third > `0xAF` {
850	return `true`;
851	}
852	} else if second == `0xBB` {
853	if third != `0xBF` {
854	return `true`;
855	}
856	} else {
857	return `true`;
858	}
859	}
860	read += `3`;
861	}
862	`0xE0` => {
863	// Three-byte special lower bound, potentially bidi
864	let second = unsafe { *(src.get_unchecked(read + `1`)) };
865	let third = unsafe { *(src.get_unchecked(read + `2`)) };
866	if ((UTF8_DATA.table[usize::from(second)]
867	& unsafe {
868	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
869	})
870	\| (third >> `6`))
871	!= `2`
872	{
873	return `true`;
874	}
875	// XXX can this be folded into the above validity check
876	if second < `0xA4` {
877	return `true`;
878	}
879	read += `3`;
880	}
881	`0xED` => {
882	// Three-byte special upper bound
883	let second = unsafe { *(src.get_unchecked(read + `1`)) };
884	let third = unsafe { *(src.get_unchecked(read + `2`)) };
885	if ((UTF8_DATA.table[usize::from(second)]
886	& unsafe {
887	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
888	})
889	\| (third >> `6`))
890	!= `2`
891	{
892	return `true`;
893	}
894	read += `3`;
895	}
896	`0xF1`..=`0xF4` => {
897	// Four-byte normal
898	let second = unsafe { *(src.get_unchecked(read + `1`)) };
899	let third = unsafe { *(src.get_unchecked(read + `2`)) };
900	let fourth = unsafe { *(src.get_unchecked(read + `3`)) };
901	if (u16::from(
902	UTF8_DATA.table[usize::from(second)]
903	& unsafe {
904	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
905	},
906	) \| u16::from(third >> `6`)
907	\| (u16::from(fourth & `0xC0`) << `2`))
908	!= `0x202`
909	{
910	return `true`;
911	}
912	read += `4`;
913	}
914	`0xF0` => {
915	// Four-byte special lower bound, potentially bidi
916	let second = unsafe { *(src.get_unchecked(read + `1`)) };
917	let third = unsafe { *(src.get_unchecked(read + `2`)) };
918	let fourth = unsafe { *(src.get_unchecked(read + `3`)) };
919	if (u16::from(
920	UTF8_DATA.table[usize::from(second)]
921	& unsafe {
922	(UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`))
923	},
924	) \| u16::from(third >> `6`)
925	\| (u16::from(fourth & `0xC0`) << `2`))
926	!= `0x202`
927	{
928	return `true`;
929	}
930	if unlikely(second == `0x90` \|\| second == `0x9E`) {
931	let third = src[read + `2`];
932	if third >= `0xA0` {
933	return `true`;
934	}
935	}
936	read += `4`;
937	}
938	_ => {
939	// Invalid lead or bidi-only lead
940	return `true`;
941	}
942	}
943	if read + `4` > src.len() {
944	if read == src.len() {
945	return `false`;
946	}
947	byte = src[read];
948	break 'inner;
949	}
950	byte = src[read];
951	continue 'inner;
952	}
953	}
954	// We can't have a complete 4-byte sequence, but we could still have
955	// a complete shorter sequence.
956
957	// At this point, `byte` is not included in `read`.
958	match byte {
959	`0`..=`0x7F` => {
960	// ASCII: go back to SIMD.
961	read += `1`;
962	src = &src[read..];
963	continue 'outer;
964	}
965	`0xC2`..=`0xD5` => {
966	// Two-byte
967	let new_read = read + `2`;
968	if new_read > src.len() {
969	return `true`;
970	}
971	let second = unsafe { *(src.get_unchecked(read + `1`)) };
972	if !in_inclusive_range8(second, `0x80`, `0xBF`) {
973	return `true`;
974	}
975	read = new_read;
976	// We need to deal with the case where we came here with 3 bytes
977	// left, so we need to take a look at the last one.
978	src = &src[read..];
979	continue 'outer;
980	}
981	`0xD6` => {
982	// Two-byte, potentially bidi
983	let new_read = read + `2`;
984	if new_read > src.len() {
985	return `true`;
986	}
987	let second = unsafe { *(src.get_unchecked(read + `1`)) };
988	if !in_inclusive_range8(second, `0x80`, `0xBF`) {
989	return `true`;
990	}
991	// XXX consider folding the above and below checks
992	if second > `0x8F` {
993	return `true`;
994	}
995	read = new_read;
996	// We need to deal with the case where we came here with 3 bytes
997	// left, so we need to take a look at the last one.
998	src = &src[read..];
999	continue 'outer;
1000	}
1001	// two-byte starting with 0xD7 and above is bidi
1002	`0xE1` \| `0xE3`..=`0xEC` \| `0xEE` => {
1003	// Three-byte normal
1004	let new_read = read + `3`;
1005	if new_read > src.len() {
1006	return `true`;
1007	}
1008	let second = unsafe { *(src.get_unchecked(read + `1`)) };
1009	let third = unsafe { *(src.get_unchecked(read + `2`)) };
1010	if ((UTF8_DATA.table[usize::from(second)]
1011	& unsafe { (UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`)) })
1012	\| (third >> `6`))
1013	!= `2`
1014	{
1015	return `true`;
1016	}
1017	}
1018	`0xE2` => {
1019	// Three-byte normal, potentially bidi
1020	let new_read = read + `3`;
1021	if new_read > src.len() {
1022	return `true`;
1023	}
1024	let second = unsafe { *(src.get_unchecked(read + `1`)) };
1025	let third = unsafe { *(src.get_unchecked(read + `2`)) };
1026	if ((UTF8_DATA.table[usize::from(second)]
1027	& unsafe { (UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`)) })
1028	\| (third >> `6`))
1029	!= `2`
1030	{
1031	return `true`;
1032	}
1033	if second == `0x80` {
1034	if third == `0x8F` \|\| third == `0xAB` \|\| third == `0xAE` {
1035	return `true`;
1036	}
1037	} else if second == `0x81` {
1038	if third == `0xA7` {
1039	return `true`;
1040	}
1041	}
1042	}
1043	`0xEF` => {
1044	// Three-byte normal, potentially bidi
1045	let new_read = read + `3`;
1046	if new_read > src.len() {
1047	return `true`;
1048	}
1049	let second = unsafe { *(src.get_unchecked(read + `1`)) };
1050	let third = unsafe { *(src.get_unchecked(read + `2`)) };
1051	if ((UTF8_DATA.table[usize::from(second)]
1052	& unsafe { (UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`)) })
1053	\| (third >> `6`))
1054	!= `2`
1055	{
1056	return `true`;
1057	}
1058	if in_inclusive_range8(second, `0xAC`, `0xB7`) {
1059	if second == `0xAC` {
1060	if third > `0x9C` {
1061	return `true`;
1062	}
1063	} else {
1064	return `true`;
1065	}
1066	} else if in_inclusive_range8(second, `0xB9`, `0xBB`) {
1067	if second == `0xB9` {
1068	if third > `0xAF` {
1069	return `true`;
1070	}
1071	} else if second == `0xBB` {
1072	if third != `0xBF` {
1073	return `true`;
1074	}
1075	} else {
1076	return `true`;
1077	}
1078	}
1079	}
1080	`0xE0` => {
1081	// Three-byte special lower bound, potentially bidi
1082	let new_read = read + `3`;
1083	if new_read > src.len() {
1084	return `true`;
1085	}
1086	let second = unsafe { *(src.get_unchecked(read + `1`)) };
1087	let third = unsafe { *(src.get_unchecked(read + `2`)) };
1088	if ((UTF8_DATA.table[usize::from(second)]
1089	& unsafe { (UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`)) })
1090	\| (third >> `6`))
1091	!= `2`
1092	{
1093	return `true`;
1094	}
1095	// XXX can this be folded into the above validity check
1096	if second < `0xA4` {
1097	return `true`;
1098	}
1099	}
1100	`0xED` => {
1101	// Three-byte special upper bound
1102	let new_read = read + `3`;
1103	if new_read > src.len() {
1104	return `true`;
1105	}
1106	let second = unsafe { *(src.get_unchecked(read + `1`)) };
1107	let third = unsafe { *(src.get_unchecked(read + `2`)) };
1108	if ((UTF8_DATA.table[usize::from(second)]
1109	& unsafe { (UTF8_DATA.table.get_unchecked(byte as usize* + `0x80`)) })
1110	\| (third >> `6`))
1111	!= `2`
1112	{
1113	return `true`;
1114	}
1115	}
1116	_ => {
1117	// Invalid lead, 4-byte lead or 2-byte bidi-only lead
1118	return `true`;
1119	}
1120	}
1121	return `false`;
1122	} else {
1123	return `false`;
1124	}
1125	}
1126	}
1127
1128	/// Checks whether a valid UTF-8 buffer contains code points that trigger
1129	/// right-to-left processing.
1130	///
1131	/// The check is done on a Unicode block basis without regard to assigned
1132	/// vs. unassigned code points in the block. Hebrew presentation forms in
1133	/// the Alphabetic Presentation Forms block are treated as if they formed
1134	/// a block on their own (i.e. it treated as right-to-left). Additionally,
1135	/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
1136	/// for. Control characters that are technically bidi controls but do not
1137	/// cause right-to-left behavior without the presence of right-to-left
1138	/// characters or right-to-left controls are not checked for. As a special
1139	/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
1140	#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1141	#[inline]
1142	pub fn is_str_bidi(buffer: &str) -> bool {
1143	// U+058F: D6 8F
1144	// U+0590: D6 90
1145	// U+08FF: E0 A3 BF
1146	// U+0900: E0 A4 80
1147	//
1148	// U+200F: E2 80 8F
1149	// U+202B: E2 80 AB
1150	// U+202E: E2 80 AE
1151	// U+2067: E2 81 A7
1152	//
1153	// U+FB1C: EF AC 9C
1154	// U+FB1D: EF AC 9D
1155	// U+FDFF: EF B7 BF
1156	// U+FE00: EF B8 80
1157	//
1158	// U+FE6F: EF B9 AF
1159	// U+FE70: EF B9 B0
1160	// U+FEFE: EF BB BE
1161	// U+FEFF: EF BB BF
1162	//
1163	// U+107FF: F0 90 9F BF
1164	// U+10800: F0 90 A0 80
1165	// U+10FFF: F0 90 BF BF
1166	// U+11000: F0 91 80 80
1167	//
1168	// U+1E7FF: F0 9E 9F BF
1169	// U+1E800: F0 9E A0 80
1170	// U+1EFFF: F0 9E BF BF
1171	// U+1F000: F0 9F 80 80
1172	let mut bytes = buffer.as_bytes();
1173	'outer: loop {
1174	// TODO: Instead of just validating ASCII using SIMD, use SIMD
1175	// to check for non-ASCII lead bytes, too, to quickly conclude
1176	// that the vector consist entirely of CJK and below-Hebrew
1177	// code points.
1178	// Unfortunately, scripts above Arabic but below CJK share
1179	// lead bytes with RTL.
1180	if let Some((mut byte, mut read)) = validate_ascii(bytes) {
1181	'inner: loop {
1182	// At this point, `byte` is not included in `read`.
1183	if byte < `0xE0` {
1184	if byte >= `0x80` {
1185	// Two-byte
1186	// Adding `unlikely` here improved throughput on
1187	// Russian plain text by 33%!
1188	if unlikely(byte >= `0xD6`) {
1189	if byte == `0xD6` {
1190	let second = bytes[read + `1`];
1191	if second > `0x8F` {
1192	return `true`;
1193	}
1194	} else {
1195	return `true`;
1196	}
1197	}
1198	read += `2`;
1199	} else {
1200	// ASCII: write and go back to SIMD.
1201	read += `1`;
1202	// Intuitively, we should go back to the outer loop only
1203	// if byte is 0x30 or above, so as to avoid trashing on
1204	// ASCII space, comma and period in non-Latin context.
1205	// However, the extra branch seems to cost more than it's
1206	// worth.
1207	bytes = &bytes[read..];
1208	continue 'outer;
1209	}
1210	} else if byte < `0xF0` {
1211	// Three-byte
1212	if unlikely(!in_inclusive_range8(byte, `0xE3`, `0xEE`) && byte != `0xE1`) {
1213	let second = bytes[read + `1`];
1214	if byte == `0xE0` {
1215	if second < `0xA4` {
1216	return `true`;
1217	}
1218	} else if byte == `0xE2` {
1219	let third = bytes[read + `2`];
1220	if second == `0x80` {
1221	if third == `0x8F` \|\| third == `0xAB` \|\| third == `0xAE` {
1222	return `true`;
1223	}
1224	} else if second == `0x81` {
1225	if third == `0xA7` {
1226	return `true`;
1227	}
1228	}
1229	} else {
1230	debug_assert_eq!(byte, `0xEF`);
1231	if in_inclusive_range8(second, `0xAC`, `0xB7`) {
1232	if second == `0xAC` {
1233	let third = bytes[read + `2`];
1234	if third > `0x9C` {
1235	return `true`;
1236	}
1237	} else {
1238	return `true`;
1239	}
1240	} else if in_inclusive_range8(second, `0xB9`, `0xBB`) {
1241	if second == `0xB9` {
1242	let third = bytes[read + `2`];
1243	if third > `0xAF` {
1244	return `true`;
1245	}
1246	} else if second == `0xBB` {
1247	let third = bytes[read + `2`];
1248	if third != `0xBF` {
1249	return `true`;
1250	}
1251	} else {
1252	return `true`;
1253	}
1254	}
1255	}
1256	}
1257	read += `3`;
1258	} else {
1259	// Four-byte
1260	let second = bytes[read + `1`];
1261	if unlikely(byte == `0xF0` && (second == `0x90` \|\| second == `0x9E`)) {
1262	let third = bytes[read + `2`];
1263	if third >= `0xA0` {
1264	return `true`;
1265	}
1266	}
1267	read += `4`;
1268	}
1269	// The comparison is always < or == and never >, but including
1270	// > here to let the compiler assume that < is true if this
1271	// comparison is false.
1272	if read >= bytes.len() {
1273	return `false`;
1274	}
1275	byte = bytes[read];
1276	continue 'inner;
1277	}
1278	} else {
1279	return `false`;
1280	}
1281	}
1282	}
1283
1284	/// Checks whether a UTF-16 buffer contains code points that trigger
1285	/// right-to-left processing.
1286	///
1287	/// The check is done on a Unicode block basis without regard to assigned
1288	/// vs. unassigned code points in the block. Hebrew presentation forms in
1289	/// the Alphabetic Presentation Forms block are treated as if they formed
1290	/// a block on their own (i.e. it treated as right-to-left). Additionally,
1291	/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
1292	/// for. Control characters that are technically bidi controls but do not
1293	/// cause right-to-left behavior without the presence of right-to-left
1294	/// characters or right-to-left controls are not checked for. As a special
1295	/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
1296	///
1297	/// Returns `true` if the input contains an RTL character or an unpaired
1298	/// high surrogate that could be the high half of an RTL character.
1299	/// Returns `false` if the input contains neither RTL characters nor
1300	/// unpaired high surrogates that could be higher halves of RTL characters.
1301	pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
1302	is_utf16_bidi_impl(buffer)
1303	}
1304
1305	/// Checks whether a scalar value triggers right-to-left processing.
1306	///
1307	/// The check is done on a Unicode block basis without regard to assigned
1308	/// vs. unassigned code points in the block. Hebrew presentation forms in
1309	/// the Alphabetic Presentation Forms block are treated as if they formed
1310	/// a block on their own (i.e. it treated as right-to-left). Additionally,
1311	/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
1312	/// for. Control characters that are technically bidi controls but do not
1313	/// cause right-to-left behavior without the presence of right-to-left
1314	/// characters or right-to-left controls are not checked for. As a special
1315	/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
1316	#[inline(always)]
1317	pub fn is_char_bidi(c: char) -> bool {
1318	// Controls:
1319	// Every control with RIGHT-TO-LEFT in its name in
1320	// https://www.unicode.org/charts/PDF/U2000.pdf
1321	// U+200F RLM
1322	// U+202B RLE
1323	// U+202E RLO
1324	// U+2067 RLI
1325	//
1326	// BMP RTL:
1327	// https://www.unicode.org/roadmaps/bmp/
1328	// U+0590...U+08FF
1329	// U+FB1D...U+FDFF Hebrew presentation forms and
1330	// Arabic Presentation Forms A
1331	// U+FE70...U+FEFE Arabic Presentation Forms B (excl. BOM)
1332	//
1333	// Supplementary RTL:
1334	// https://www.unicode.org/roadmaps/smp/
1335	// U+10800...U+10FFF (Lead surrogate U+D802 or U+D803)
1336	// U+1E800...U+1EFFF (Lead surrogate U+D83A or U+D83B)
1337	let code_point = u32::from(c);
1338	if code_point < `0x0590` {
1339	// Below Hebrew
1340	return `false`;
1341	}
1342	if in_range32(code_point, `0x0900`, `0xFB1D`) {
1343	// Above Arabic Extended-A and below Hebrew presentation forms
1344	if in_inclusive_range32(code_point, `0x200F`, `0x2067`) {
1345	// In the range that contains the RTL controls
1346	return code_point == `0x200F`
1347	\|\| code_point == `0x202B`
1348	\|\| code_point == `0x202E`
1349	\|\| code_point == `0x2067`;
1350	}
1351	return `false`;
1352	}
1353	if code_point > `0x1EFFF` {
1354	// Above second astral RTL. (Emoji is here.)
1355	return `false`;
1356	}
1357	if in_range32(code_point, `0x11000`, `0x1E800`) {
1358	// Between astral RTL blocks
1359	return `false`;
1360	}
1361	if in_range32(code_point, `0xFEFF`, `0x10800`) {
1362	// Above Arabic Presentations Forms B (excl. BOM) and below first
1363	// astral RTL
1364	return `false`;
1365	}
1366	if in_range32(code_point, `0xFE00`, `0xFE70`) {
1367	// Between Arabic Presentations Forms
1368	return `false`;
1369	}
1370	`true`
1371	}
1372
1373	/// Checks whether a UTF-16 code unit triggers right-to-left processing.
1374	///
1375	/// The check is done on a Unicode block basis without regard to assigned
1376	/// vs. unassigned code points in the block. Hebrew presentation forms in
1377	/// the Alphabetic Presentation Forms block are treated as if they formed
1378	/// a block on their own (i.e. it treated as right-to-left). Additionally,
1379	/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
1380	/// for. Control characters that are technically bidi controls but do not
1381	/// cause right-to-left behavior without the presence of right-to-left
1382	/// characters or right-to-left controls are not checked for. As a special
1383	/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
1384	///
1385	/// Since supplementary-plane right-to-left blocks are identifiable from the
1386	/// high surrogate without examining the low surrogate, this function returns
1387	/// `true` for such high surrogates making the function suitable for handling
1388	/// supplementary-plane text without decoding surrogate pairs to scalar
1389	/// values. Obviously, such high surrogates are then reported as right-to-left
1390	/// even if actually unpaired.
1391	#[inline(always)]
1392	pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
1393	if u < `0x0590` {
1394	// Below Hebrew
1395	return `false`;
1396	}
1397	if in_range16(u, `0x0900`, `0xD802`) {
1398	// Above Arabic Extended-A and below first RTL surrogate
1399	if in_inclusive_range16(u, `0x200F`, `0x2067`) {
1400	// In the range that contains the RTL controls
1401	return u == `0x200F` \|\| u == `0x202B` \|\| u == `0x202E` \|\| u == `0x2067`;
1402	}
1403	return `false`;
1404	}
1405	if in_range16(u, `0xD83C`, `0xFB1D`) {
1406	// Between astral RTL high surrogates and Hebrew presentation forms
1407	// (Emoji is here)
1408	return `false`;
1409	}
1410	if in_range16(u, `0xD804`, `0xD83A`) {
1411	// Between RTL high surragates
1412	return `false`;
1413	}
1414	if u > `0xFEFE` {
1415	// Above Arabic Presentation Forms (excl. BOM)
1416	return `false`;
1417	}
1418	if in_range16(u, `0xFE00`, `0xFE70`) {
1419	// Between Arabic Presentations Forms
1420	return `false`;
1421	}
1422	`true`
1423	}
1424
1425	/// Checks whether a potentially invalid UTF-8 buffer contains code points
1426	/// that trigger right-to-left processing or is all-Latin1.
1427	///
1428	/// Possibly more efficient than performing the checks separately.
1429	///
1430	/// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`.
1431	/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return
1432	/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
1433	pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi {
1434	if let Some(offset: usize) = is_utf8_latin1_impl(buffer) {
1435	if is_utf8_bidi(&buffer[offset..]) {
1436	Latin1Bidi::Bidi
1437	} else {
1438	Latin1Bidi::LeftToRight
1439	}
1440	} else {
1441	Latin1Bidi::Latin1
1442	}
1443	}
1444
1445	/// Checks whether a valid UTF-8 buffer contains code points
1446	/// that trigger right-to-left processing or is all-Latin1.
1447	///
1448	/// Possibly more efficient than performing the checks separately.
1449	///
1450	/// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`.
1451	/// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return
1452	/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
1453	pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi {
1454	// The transition from the latin1 check to the bidi check isn't
1455	// optimal but not tweaking it to perfection today.
1456	if let Some(offset: usize) = is_str_latin1_impl(buffer) {
1457	if is_str_bidi(&buffer[offset..]) {
1458	Latin1Bidi::Bidi
1459	} else {
1460	Latin1Bidi::LeftToRight
1461	}
1462	} else {
1463	Latin1Bidi::Latin1
1464	}
1465	}
1466
1467	/// Checks whether a potentially invalid UTF-16 buffer contains code points
1468	/// that trigger right-to-left processing or is all-Latin1.
1469	///
1470	/// Possibly more efficient than performing the checks separately.
1471	///
1472	/// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`.
1473	/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return
1474	/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
1475	pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi {
1476	check_utf16_for_latin1_and_bidi_impl(buffer)
1477	}
1478
1479	/// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced
1480	/// with the REPLACEMENT CHARACTER.
1481	///
1482	/// The length of the destination buffer must be at least the length of the
1483	/// source buffer _plus one_.
1484	///
1485	/// Returns the number of `u16`s written.
1486	///
1487	/// # Panics
1488	///
1489	/// Panics if the destination buffer is shorter than stated above.
1490	pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
1491	// TODO: Can the requirement for dst to be at least one unit longer
1492	// be eliminated?
1493	assert!(dst.len() > src.len());
1494	let mut decoder = Utf8Decoder::new_inner();
1495	let mut total_read = `0usize`;
1496	let mut total_written = `0usize`;
1497	loop {
1498	let (result, read, written) =
1499	decoder.decode_to_utf16_raw(&src[total_read..], &mut dst[total_written..], `true`);
1500	total_read += read;
1501	total_written += written;
1502	match result {
1503	DecoderResult::InputEmpty => {
1504	return total_written;
1505	}
1506	DecoderResult::OutputFull => {
1507	unreachable!("The assert at the top of the function should have caught this.");
1508	}
1509	DecoderResult::Malformed(_, _) => {
1510	// There should always be space for the U+FFFD, because
1511	// otherwise we'd have gotten OutputFull already.
1512	dst[total_written] = `0xFFFD`;
1513	total_written += `1`;
1514	}
1515	}
1516	}
1517	}
1518
1519	/// Converts valid UTF-8 to valid UTF-16.
1520	///
1521	/// The length of the destination buffer must be at least the length of the
1522	/// source buffer.
1523	///
1524	/// Returns the number of `u16`s written.
1525	///
1526	/// # Panics
1527	///
1528	/// Panics if the destination buffer is shorter than stated above.
1529	pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
1530	assert!(
1531	dst.len() >= src.len(),
1532	"Destination must not be shorter than the source."
1533	);
1534	let bytes = src.as_bytes();
1535	let mut read = `0`;
1536	let mut written = `0`;
1537	'outer: loop {
1538	let mut byte = {
1539	let src_remaining = &bytes[read..];
1540	let dst_remaining = &mut dst[written..];
1541	let length = src_remaining.len();
1542	match unsafe {
1543	ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1544	} {
1545	None => {
1546	written += length;
1547	return written;
1548	}
1549	Some((non_ascii, consumed)) => {
1550	read += consumed;
1551	written += consumed;
1552	non_ascii
1553	}
1554	}
1555	};
1556	'inner: loop {
1557	// At this point, `byte` is not included in `read`.
1558	if byte < `0xE0` {
1559	if byte >= `0x80` {
1560	// Two-byte
1561	let second = unsafe { *(bytes.get_unchecked(read + `1`)) };
1562	let point = ((u16::from(byte) & `0x1F`) << `6`) \| (u16::from(second) & `0x3F`);
1563	unsafe { *(dst.get_unchecked_mut(written)) = point };
1564	read += `2`;
1565	written += `1`;
1566	} else {
1567	// ASCII: write and go back to SIMD.
1568	unsafe { (dst.get_unchecked_mut(written)) = u16*::from(byte) };
1569	read += `1`;
1570	written += `1`;
1571	// Intuitively, we should go back to the outer loop only
1572	// if byte is 0x30 or above, so as to avoid trashing on
1573	// ASCII space, comma and period in non-Latin context.
1574	// However, the extra branch seems to cost more than it's
1575	// worth.
1576	continue 'outer;
1577	}
1578	} else if byte < `0xF0` {
1579	// Three-byte
1580	let second = unsafe { *(bytes.get_unchecked(read + `1`)) };
1581	let third = unsafe { *(bytes.get_unchecked(read + `2`)) };
1582	let point = ((u16::from(byte) & `0xF`) << `12`)
1583	\| ((u16::from(second) & `0x3F`) << `6`)
1584	\| (u16::from(third) & `0x3F`);
1585	unsafe { *(dst.get_unchecked_mut(written)) = point };
1586	read += `3`;
1587	written += `1`;
1588	} else {
1589	// Four-byte
1590	let second = unsafe { *(bytes.get_unchecked(read + `1`)) };
1591	let third = unsafe { *(bytes.get_unchecked(read + `2`)) };
1592	let fourth = unsafe { *(bytes.get_unchecked(read + `3`)) };
1593	let point = ((u32::from(byte) & `0x7`) << `18`)
1594	\| ((u32::from(second) & `0x3F`) << `12`)
1595	\| ((u32::from(third) & `0x3F`) << `6`)
1596	\| (u32::from(fourth) & `0x3F`);
1597	unsafe { (dst.get_unchecked_mut(written)) = (`0xD7C0` + (point >> `10`)) as u16* };
1598	unsafe {
1599	(dst.get_unchecked_mut(written + `1`)) = (`0xDC00` + (point & `0x3FF`)) as u16*
1600	};
1601	read += `4`;
1602	written += `2`;
1603	}
1604	// The comparison is always < or == and never >, but including
1605	// > here to let the compiler assume that < is true if this
1606	// comparison is false.
1607	if read >= src.len() {
1608	return written;
1609	}
1610	byte = bytes[read];
1611	continue 'inner;
1612	}
1613	}
1614	}
1615
1616	/// Converts potentially-invalid UTF-8 to valid UTF-16 signaling on error.
1617	///
1618	/// The length of the destination buffer must be at least the length of the
1619	/// source buffer.
1620	///
1621	/// Returns the number of `u16`s written or `None` if the input was invalid.
1622	///
1623	/// When the input was invalid, some output may have been written.
1624	///
1625	/// # Panics
1626	///
1627	/// Panics if the destination buffer is shorter than stated above.
1628	pub fn convert_utf8_to_utf16_without_replacement(src: &[u8], dst: &mut [u16]) -> Option<usize> {
1629	assert!(
1630	dst.len() >= src.len(),
1631	"Destination must not be shorter than the source."
1632	);
1633	let (read: usize, written: usize) = convert_utf8_to_utf16_up_to_invalid(src, dst);
1634	if read == src.len() {
1635	return Some(written);
1636	}
1637	None
1638	}
1639
1640	/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
1641	/// with the REPLACEMENT CHARACTER with potentially insufficient output
1642	/// space.
1643	///
1644	/// Returns the number of code units read and the number of bytes written.
1645	///
1646	/// Guarantees that the bytes in the destination beyond the number of
1647	/// bytes claimed as written by the second item of the return tuple
1648	/// are left unmodified.
1649	///
1650	/// Not all code units are read if there isn't enough output space.
1651	///
1652	/// Note that this method isn't designed for general streamability but for
1653	/// not allocating memory for the worst case up front. Specifically,
1654	/// if the input starts with or ends with an unpaired surrogate, those are
1655	/// replaced with the REPLACEMENT CHARACTER.
1656	///
1657	/// Matches the semantics of `TextEncoder.encodeInto()` from the
1658	/// Encoding Standard.
1659	///
1660	/// # Safety
1661	///
1662	/// If you want to convert into a `&mut str`, use
1663	/// `convert_utf16_to_str_partial()` instead of using this function
1664	/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
1665	#[inline(always)]
1666	pub fn convert_utf16_to_utf8_partial(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
1667	// The two functions called below are marked `inline(never)` to make
1668	// transitions from the hot part (first function) into the cold part
1669	// (second function) go through a return and another call to discouge
1670	// the CPU from speculating from the hot code into the cold code.
1671	// Letting the transitions be mere intra-function jumps, even to
1672	// basic blocks out-of-lined to the end of the function would wipe
1673	// away a quarter of Arabic encode performance on Haswell!
1674	let (read: usize, written: usize) = convert_utf16_to_utf8_partial_inner(src, dst);
1675	if likely(read == src.len()) {
1676	return (read, written);
1677	}
1678	let (tail_read: usize, tail_written: usize) =
1679	convert_utf16_to_utf8_partial_tail(&src[read..], &mut dst[written..]);
1680	(read + tail_read, written + tail_written)
1681	}
1682
1683	/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
1684	/// with the REPLACEMENT CHARACTER.
1685	///
1686	/// The length of the destination buffer must be at least the length of the
1687	/// source buffer times three.
1688	///
1689	/// Returns the number of bytes written.
1690	///
1691	/// # Panics
1692	///
1693	/// Panics if the destination buffer is shorter than stated above.
1694	///
1695	/// # Safety
1696	///
1697	/// If you want to convert into a `&mut str`, use `convert_utf16_to_str()`
1698	/// instead of using this function together with the `unsafe` method
1699	/// `as_bytes_mut()` on `&mut str`.
1700	#[inline(always)]
1701	pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
1702	assert!(dst.len() >= src.len() * `3`);
1703	let (read: usize, written: usize) = convert_utf16_to_utf8_partial(src, dst);
1704	debug_assert_eq!(read, src.len());
1705	written
1706	}
1707
1708	/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
1709	/// with the REPLACEMENT CHARACTER such that the validity of the output is
1710	/// signaled using the Rust type system with potentially insufficient output
1711	/// space.
1712	///
1713	/// Returns the number of code units read and the number of bytes written.
1714	///
1715	/// Not all code units are read if there isn't enough output space.
1716	///
1717	/// Note that this method isn't designed for general streamability but for
1718	/// not allocating memory for the worst case up front. Specifically,
1719	/// if the input starts with or ends with an unpaired surrogate, those are
1720	/// replaced with the REPLACEMENT CHARACTER.
1721	pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) {
1722	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1723	let (read: usize, written: usize) = convert_utf16_to_utf8_partial(src, dst:bytes);
1724	let len: usize = bytes.len();
1725	let mut trail: usize = written;
1726	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
1727	bytes[trail] = `0`;
1728	trail += `1`;
1729	}
1730	(read, written)
1731	}
1732
1733	/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
1734	/// with the REPLACEMENT CHARACTER such that the validity of the output is
1735	/// signaled using the Rust type system.
1736	///
1737	/// The length of the destination buffer must be at least the length of the
1738	/// source buffer times three.
1739	///
1740	/// Returns the number of bytes written.
1741	///
1742	/// # Panics
1743	///
1744	/// Panics if the destination buffer is shorter than stated above.
1745	#[inline(always)]
1746	pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
1747	assert!(dst.len() >= src.len() * `3`);
1748	let (read: usize, written: usize) = convert_utf16_to_str_partial(src, dst);
1749	debug_assert_eq!(read, src.len());
1750	written
1751	}
1752
1753	/// Converts bytes whose unsigned value is interpreted as Unicode code point
1754	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-16.
1755	///
1756	/// The length of the destination buffer must be at least the length of the
1757	/// source buffer.
1758	///
1759	/// The number of `u16`s written equals the length of the source buffer.
1760	///
1761	/// # Panics
1762	///
1763	/// Panics if the destination buffer is shorter than stated above.
1764	pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
1765	assert!(
1766	dst.len() >= src.len(),
1767	"Destination must not be shorter than the source."
1768	);
1769	// TODO: On aarch64, the safe version autovectorizes to the same unpacking
1770	// instructions and this code, but, yet, the autovectorized version is
1771	// faster.
1772	unsafe {
1773	unpack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1774	}
1775	}
1776
1777	/// Converts bytes whose unsigned value is interpreted as Unicode code point
1778	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
1779	/// output space.
1780	///
1781	/// Returns the number of bytes read and the number of bytes written.
1782	///
1783	/// If the output isn't large enough, not all input is consumed.
1784	///
1785	/// # Safety
1786	///
1787	/// If you want to convert into a `&mut str`, use
1788	/// `convert_utf16_to_str_partial()` instead of using this function
1789	/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
1790	pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) {
1791	let src_len = src.len();
1792	let src_ptr = src.as_ptr();
1793	let dst_ptr = dst.as_mut_ptr();
1794	let dst_len = dst.len();
1795	let mut total_read = `0usize`;
1796	let mut total_written = `0usize`;
1797	loop {
1798	// src can't advance more than dst
1799	let src_left = src_len - total_read;
1800	let dst_left = dst_len - total_written;
1801	let min_left = ::core::cmp::min(src_left, dst_left);
1802	if let Some((non_ascii, consumed)) = unsafe {
1803	ascii_to_ascii(
1804	src_ptr.add(total_read),
1805	dst_ptr.add(total_written),
1806	min_left,
1807	)
1808	} {
1809	total_read += consumed;
1810	total_written += consumed;
1811	if total_written.checked_add(`2`).unwrap() > dst_len {
1812	return (total_read, total_written);
1813	}
1814
1815	total_read += `1`; // consume `non_ascii`
1816
1817	dst[total_written] = (non_ascii >> `6`) \| `0xC0`;
1818	total_written += `1`;
1819	dst[total_written] = (non_ascii & `0x3F`) \| `0x80`;
1820	total_written += `1`;
1821	continue;
1822	}
1823	return (total_read + min_left, total_written + min_left);
1824	}
1825	}
1826
1827	/// Converts bytes whose unsigned value is interpreted as Unicode code point
1828	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
1829	///
1830	/// The length of the destination buffer must be at least the length of the
1831	/// source buffer times two.
1832	///
1833	/// Returns the number of bytes written.
1834	///
1835	/// # Panics
1836	///
1837	/// Panics if the destination buffer is shorter than stated above.
1838	///
1839	/// # Safety
1840	///
1841	/// Note that this function may write garbage beyond the number of bytes
1842	/// indicated by the return value, so using a `&mut str` interpreted as
1843	/// `&mut [u8]` as the destination is not safe. If you want to convert into
1844	/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
1845	#[inline]
1846	pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
1847	assert!(
1848	dst.len() >= src.len() * `2`,
1849	"Destination must not be shorter than the source times two."
1850	);
1851	let (read: usize, written: usize) = convert_latin1_to_utf8_partial(src, dst);
1852	debug_assert_eq!(read, src.len());
1853	written
1854	}
1855
1856	/// Converts bytes whose unsigned value is interpreted as Unicode code point
1857	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 such that the validity of the
1858	/// output is signaled using the Rust type system with potentially insufficient
1859	/// output space.
1860	///
1861	/// Returns the number of bytes read and the number of bytes written.
1862	///
1863	/// If the output isn't large enough, not all input is consumed.
1864	#[inline]
1865	pub fn convert_latin1_to_str_partial(src: &[u8], dst: &mut str) -> (usize, usize) {
1866	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1867	let (read: usize, written: usize) = convert_latin1_to_utf8_partial(src, dst:bytes);
1868	let len: usize = bytes.len();
1869	let mut trail: usize = written;
1870	let max: usize = ::core::cmp::min(v1:len, v2:trail + MAX_STRIDE_SIZE);
1871	while trail < max {
1872	bytes[trail] = `0`;
1873	trail += `1`;
1874	}
1875	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
1876	bytes[trail] = `0`;
1877	trail += `1`;
1878	}
1879	(read, written)
1880	}
1881
1882	/// Converts bytes whose unsigned value is interpreted as Unicode code point
1883	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 such that the validity of the
1884	/// output is signaled using the Rust type system.
1885	///
1886	/// The length of the destination buffer must be at least the length of the
1887	/// source buffer times two.
1888	///
1889	/// Returns the number of bytes written.
1890	///
1891	/// # Panics
1892	///
1893	/// Panics if the destination buffer is shorter than stated above.
1894	#[inline]
1895	pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
1896	assert!(
1897	dst.len() >= src.len() * `2`,
1898	"Destination must not be shorter than the source times two."
1899	);
1900	let (read: usize, written: usize) = convert_latin1_to_str_partial(src, dst);
1901	debug_assert_eq!(read, src.len());
1902	written
1903	}
1904
1905	/// If the input is valid UTF-8 representing only Unicode code points from
1906	/// U+0000 to U+00FF, inclusive, converts the input into output that
1907	/// represents the value of each code point as the unsigned byte value of
1908	/// each output byte.
1909	///
1910	/// If the input does not fulfill the condition stated above, this function
1911	/// panics if debug assertions are enabled (and fuzzing isn't) and otherwise
1912	/// does something that is memory-safe without any promises about any
1913	/// properties of the output. In particular, callers shouldn't assume the
1914	/// output to be the same across crate versions or CPU architectures and
1915	/// should not assume that non-ASCII input can't map to ASCII output.
1916	///
1917	/// The length of the destination buffer must be at least the length of the
1918	/// source buffer.
1919	///
1920	/// Returns the number of bytes written.
1921	///
1922	/// # Panics
1923	///
1924	/// Panics if the destination buffer is shorter than stated above.
1925	///
1926	/// If debug assertions are enabled (and not fuzzing) and the input is
1927	/// not in the range U+0000 to U+00FF, inclusive.
1928	pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
1929	assert!(
1930	dst.len() >= src.len(),
1931	"Destination must not be shorter than the source."
1932	);
1933	non_fuzz_debug_assert!(is_utf8_latin1(src));
1934	let src_len = src.len();
1935	let src_ptr = src.as_ptr();
1936	let dst_ptr = dst.as_mut_ptr();
1937	let mut total_read = `0usize`;
1938	let mut total_written = `0usize`;
1939	loop {
1940	// dst can't advance more than src
1941	let src_left = src_len - total_read;
1942	if let Some((non_ascii, consumed)) = unsafe {
1943	ascii_to_ascii(
1944	src_ptr.add(total_read),
1945	dst_ptr.add(total_written),
1946	src_left,
1947	)
1948	} {
1949	total_read += consumed + `1`;
1950	total_written += consumed;
1951
1952	if total_read == src_len {
1953	return total_written;
1954	}
1955
1956	let trail = src[total_read];
1957	total_read += `1`;
1958
1959	dst[total_written] = ((non_ascii & `0x1F`) << `6`) \| (trail & `0x3F`);
1960	total_written += `1`;
1961	continue;
1962	}
1963	return total_written + src_left;
1964	}
1965	}
1966
1967	/// If the input is valid UTF-16 representing only Unicode code points from
1968	/// U+0000 to U+00FF, inclusive, converts the input into output that
1969	/// represents the value of each code point as the unsigned byte value of
1970	/// each output byte.
1971	///
1972	/// If the input does not fulfill the condition stated above, does something
1973	/// that is memory-safe without any promises about any properties of the
1974	/// output and will probably assert in debug builds in future versions.
1975	/// In particular, callers shouldn't assume the output to be the same across
1976	/// crate versions or CPU architectures and should not assume that non-ASCII
1977	/// input can't map to ASCII output.
1978	///
1979	/// The length of the destination buffer must be at least the length of the
1980	/// source buffer.
1981	///
1982	/// The number of bytes written equals the length of the source buffer.
1983	///
1984	/// # Panics
1985	///
1986	/// Panics if the destination buffer is shorter than stated above.
1987	///
1988	/// (Probably in future versions if debug assertions are enabled (and not
1989	/// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.)
1990	pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
1991	assert!(
1992	dst.len() >= src.len(),
1993	"Destination must not be shorter than the source."
1994	);
1995	// non_fuzz_debug_assert!(is_utf16_latin1(src));
1996	unsafe {
1997	pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1998	}
1999	}
2000
2001	/// Converts bytes whose unsigned value is interpreted as Unicode code point
2002	/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
2003	///
2004	/// Borrows if input is ASCII-only. Performs a single heap allocation
2005	/// otherwise.
2006	///
2007	/// Only available if the `alloc` feature is enabled (enabled by default).
2008	#[cfg(feature = "alloc")]
2009	pub fn decode_latin1<'a>(bytes: &'a [u8]) -> Cow<'a, str> {
2010	let up_to: usize = ascii_valid_up_to(bytes);
2011	// >= makes later things optimize better than ==
2012	if up_to >= bytes.len() {
2013	debug_assert_eq!(up_to, bytes.len());
2014	let s: &str = unsafe { ::core::str::from_utf8_unchecked(bytes) };
2015	return Cow::Borrowed(s);
2016	}
2017	let (head: &[u8], tail: &[u8]) = bytes.split_at(mid:up_to);
2018	let capacity: usize = head.len() + tail.len() * `2`;
2019	let mut vec: Vec = Vec::with_capacity(capacity);
2020	unsafe {
2021	vec.set_len(new_len:capacity);
2022	}
2023	(&mut vec[..up_to]).copy_from_slice(src:head);
2024	let written: usize = convert_latin1_to_utf8(src:tail, &mut vec[up_to..]);
2025	vec.truncate(len:up_to + written);
2026	Cow::Owned(unsafe { String::from_utf8_unchecked(bytes:vec) })
2027	}
2028
2029	/// If the input is valid UTF-8 representing only Unicode code points from
2030	/// U+0000 to U+00FF, inclusive, converts the input into output that
2031	/// represents the value of each code point as the unsigned byte value of
2032	/// each output byte.
2033	///
2034	/// If the input does not fulfill the condition stated above, this function
2035	/// panics if debug assertions are enabled (and fuzzing isn't) and otherwise
2036	/// does something that is memory-safe without any promises about any
2037	/// properties of the output. In particular, callers shouldn't assume the
2038	/// output to be the same across crate versions or CPU architectures and
2039	/// should not assume that non-ASCII input can't map to ASCII output.
2040	///
2041	/// Borrows if input is ASCII-only. Performs a single heap allocation
2042	/// otherwise.
2043	///
2044	/// Only available if the `alloc` feature is enabled (enabled by default).
2045	#[cfg(feature = "alloc")]
2046	pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> {
2047	let bytes: &[u8] = string.as_bytes();
2048	let up_to: usize = ascii_valid_up_to(bytes);
2049	// >= makes later things optimize better than ==
2050	if up_to >= bytes.len() {
2051	debug_assert_eq!(up_to, bytes.len());
2052	return Cow::Borrowed(bytes);
2053	}
2054	let (head: &[u8], tail: &[u8]) = bytes.split_at(mid:up_to);
2055	let capacity: usize = bytes.len();
2056	let mut vec: Vec = Vec::with_capacity(capacity);
2057	unsafe {
2058	vec.set_len(new_len:capacity);
2059	}
2060	(&mut vec[..up_to]).copy_from_slice(src:head);
2061	let written: usize = convert_utf8_to_latin1_lossy(src:tail, &mut vec[up_to..]);
2062	vec.truncate(len:up_to + written);
2063	Cow::Owned(vec)
2064	}
2065
2066	/// Returns the index of the first unpaired surrogate or, if the input is
2067	/// valid UTF-16 in its entirety, the length of the input.
2068	pub fn utf16_valid_up_to(buffer: &[u16]) -> usize {
2069	utf16_valid_up_to_impl(buffer)
2070	}
2071
2072	/// Returns the index of first byte that starts an invalid byte
2073	/// sequence or a non-Latin1 byte sequence, or the length of the
2074	/// string if there are neither.
2075	pub fn utf8_latin1_up_to(buffer: &[u8]) -> usize {
2076	is_utf8_latin1_impl(buffer).unwrap_or(default:buffer.len())
2077	}
2078
2079	/// Returns the index of first byte that starts a non-Latin1 byte
2080	/// sequence, or the length of the string if there are none.
2081	pub fn str_latin1_up_to(buffer: &str) -> usize {
2082	is_str_latin1_impl(buffer).unwrap_or_else(\|\| buffer.len())
2083	}
2084
2085	/// Replaces unpaired surrogates in the input with the REPLACEMENT CHARACTER.
2086	#[inline]
2087	pub fn ensure_utf16_validity(buffer: &mut [u16]) {
2088	let mut offset: usize = `0`;
2089	loop {
2090	offset += utf16_valid_up_to(&buffer[offset..]);
2091	if offset == buffer.len() {
2092	return;
2093	}
2094	buffer[offset] = `0xFFFD`;
2095	offset += `1`;
2096	}
2097	}
2098
2099	/// Copies ASCII from source to destination up to the first non-ASCII byte
2100	/// (or the end of the input if it is ASCII in its entirety).
2101	///
2102	/// The length of the destination buffer must be at least the length of the
2103	/// source buffer.
2104	///
2105	/// Returns the number of bytes written.
2106	///
2107	/// # Panics
2108	///
2109	/// Panics if the destination buffer is shorter than stated above.
2110	pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize {
2111	assert!(
2112	dst.len() >= src.len(),
2113	"Destination must not be shorter than the source."
2114	);
2115	if let Some((_, consumed: usize)) =
2116	unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2117	{
2118	consumed
2119	} else {
2120	src.len()
2121	}
2122	}
2123
2124	/// Copies ASCII from source to destination zero-extending it to UTF-16 up to
2125	/// the first non-ASCII byte (or the end of the input if it is ASCII in its
2126	/// entirety).
2127	///
2128	/// The length of the destination buffer must be at least the length of the
2129	/// source buffer.
2130	///
2131	/// Returns the number of `u16`s written.
2132	///
2133	/// # Panics
2134	///
2135	/// Panics if the destination buffer is shorter than stated above.
2136	pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize {
2137	assert!(
2138	dst.len() >= src.len(),
2139	"Destination must not be shorter than the source."
2140	);
2141	if let Some((_, consumed: usize)) =
2142	unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2143	{
2144	consumed
2145	} else {
2146	src.len()
2147	}
2148	}
2149
2150	/// Copies Basic Latin from source to destination narrowing it to ASCII up to
2151	/// the first non-Basic Latin code unit (or the end of the input if it is
2152	/// Basic Latin in its entirety).
2153	///
2154	/// The length of the destination buffer must be at least the length of the
2155	/// source buffer.
2156	///
2157	/// Returns the number of bytes written.
2158	///
2159	/// # Panics
2160	///
2161	/// Panics if the destination buffer is shorter than stated above.
2162	pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize {
2163	assert!(
2164	dst.len() >= src.len(),
2165	"Destination must not be shorter than the source."
2166	);
2167	if let Some((_, consumed: usize)) =
2168	unsafe { basic_latin_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2169	{
2170	consumed
2171	} else {
2172	src.len()
2173	}
2174	}
2175
2176	// Any copyright to the test code below this comment is dedicated to the
2177	// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
2178
2179	#[cfg(all(test, feature = "alloc"))]
2180	mod tests {
2181	use super::*;
2182
2183	#[test]
2184	fn test_is_ascii_success() {
2185	let mut src: Vec<u8> = Vec::with_capacity(`128`);
2186	src.resize(`128`, `0`);
2187	for i in `0`..src.len() {
2188	src[i] = i as u8;
2189	}
2190	for i in `0`..src.len() {
2191	assert!(is_ascii(&src[i..]));
2192	}
2193	}
2194
2195	#[test]
2196	fn test_is_ascii_fail() {
2197	let mut src: Vec<u8> = Vec::with_capacity(`128`);
2198	src.resize(`128`, `0`);
2199	for i in `0`..src.len() {
2200	src[i] = i as u8;
2201	}
2202	for i in `0`..src.len() {
2203	let tail = &mut src[i..];
2204	for j in `0`..tail.len() {
2205	tail[j] = `0xA0`;
2206	assert!(!is_ascii(tail));
2207	}
2208	}
2209	}
2210
2211	#[test]
2212	fn test_is_basic_latin_success() {
2213	let mut src: Vec<u16> = Vec::with_capacity(`128`);
2214	src.resize(`128`, `0`);
2215	for i in `0`..src.len() {
2216	src[i] = i as u16;
2217	}
2218	for i in `0`..src.len() {
2219	assert!(is_basic_latin(&src[i..]));
2220	}
2221	}
2222
2223	#[test]
2224	fn test_is_basic_latin_fail() {
2225	let mut src: Vec<u16> = Vec::with_capacity(`128`);
2226	src.resize(`128`, `0`);
2227	for i in `0`..src.len() {
2228	src[i] = i as u16;
2229	}
2230	for i in `0`..src.len() {
2231	let tail = &mut src[i..];
2232	for j in `0`..tail.len() {
2233	tail[j] = `0xA0`;
2234	assert!(!is_basic_latin(tail));
2235	}
2236	}
2237	}
2238
2239	#[test]
2240	fn test_is_utf16_latin1_success() {
2241	let mut src: Vec<u16> = Vec::with_capacity(`256`);
2242	src.resize(`256`, `0`);
2243	for i in `0`..src.len() {
2244	src[i] = i as u16;
2245	}
2246	for i in `0`..src.len() {
2247	assert!(is_utf16_latin1(&src[i..]));
2248	assert_eq!(
2249	check_utf16_for_latin1_and_bidi(&src[i..]),
2250	Latin1Bidi::Latin1
2251	);
2252	}
2253	}
2254
2255	#[test]
2256	fn test_is_utf16_latin1_fail() {
2257	let len = if cfg!(miri) { `64` } else { `256` }; // Miri is too slow
2258	let mut src: Vec<u16> = Vec::with_capacity(len);
2259	src.resize(len, `0`);
2260	for i in `0`..src.len() {
2261	src[i] = i as u16;
2262	}
2263	for i in `0`..src.len() {
2264	let tail = &mut src[i..];
2265	for j in `0`..tail.len() {
2266	tail[j] = `0x100` + j as u16;
2267	assert!(!is_utf16_latin1(tail));
2268	assert_ne!(check_utf16_for_latin1_and_bidi(tail), Latin1Bidi::Latin1);
2269	}
2270	}
2271	}
2272
2273	#[test]
2274	fn test_is_str_latin1_success() {
2275	let len = if cfg!(miri) { `64` } else { `256` }; // Miri is too slow
2276	let mut src: Vec<u16> = Vec::with_capacity(len);
2277	src.resize(len, `0`);
2278	for i in `0`..src.len() {
2279	src[i] = i as u16;
2280	}
2281	for i in `0`..src.len() {
2282	let s = String::from_utf16(&src[i..]).unwrap();
2283	assert!(is_str_latin1(&s[..]));
2284	assert_eq!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2285	}
2286	}
2287
2288	#[test]
2289	fn test_is_str_latin1_fail() {
2290	let len = if cfg!(miri) { `32` } else { `256` }; // Miri is too slow
2291	let mut src: Vec<u16> = Vec::with_capacity(len);
2292	src.resize(len, `0`);
2293	for i in `0`..src.len() {
2294	src[i] = i as u16;
2295	}
2296	for i in `0`..src.len() {
2297	let tail = &mut src[i..];
2298	for j in `0`..tail.len() {
2299	tail[j] = `0x100` + j as u16;
2300	let s = String::from_utf16(tail).unwrap();
2301	assert!(!is_str_latin1(&s[..]));
2302	assert_ne!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2303	}
2304	}
2305	}
2306
2307	#[test]
2308	fn test_is_utf8_latin1_success() {
2309	let len = if cfg!(miri) { `64` } else { `256` }; // Miri is too slow
2310	let mut src: Vec<u16> = Vec::with_capacity(len);
2311	src.resize(len, `0`);
2312	for i in `0`..src.len() {
2313	src[i] = i as u16;
2314	}
2315	for i in `0`..src.len() {
2316	let s = String::from_utf16(&src[i..]).unwrap();
2317	assert!(is_utf8_latin1(s.as_bytes()));
2318	assert_eq!(
2319	check_utf8_for_latin1_and_bidi(s.as_bytes()),
2320	Latin1Bidi::Latin1
2321	);
2322	}
2323	}
2324
2325	#[test]
2326	fn test_is_utf8_latin1_fail() {
2327	let len = if cfg!(miri) { `32` } else { `256` }; // Miri is too slow
2328	let mut src: Vec<u16> = Vec::with_capacity(len);
2329	src.resize(len, `0`);
2330	for i in `0`..src.len() {
2331	src[i] = i as u16;
2332	}
2333	for i in `0`..src.len() {
2334	let tail = &mut src[i..];
2335	for j in `0`..tail.len() {
2336	tail[j] = `0x100` + j as u16;
2337	let s = String::from_utf16(tail).unwrap();
2338	assert!(!is_utf8_latin1(s.as_bytes()));
2339	assert_ne!(
2340	check_utf8_for_latin1_and_bidi(s.as_bytes()),
2341	Latin1Bidi::Latin1
2342	);
2343	}
2344	}
2345	}
2346
2347	#[test]
2348	fn test_is_utf8_latin1_invalid() {
2349	assert!(!is_utf8_latin1(b"`\xC3`"));
2350	assert!(!is_utf8_latin1(b"a`\xC3`"));
2351	assert!(!is_utf8_latin1(b"`\xFF`"));
2352	assert!(!is_utf8_latin1(b"a`\xFF`"));
2353	assert!(!is_utf8_latin1(b"`\xC3\xFF`"));
2354	assert!(!is_utf8_latin1(b"a`\xC3\xFF`"));
2355	}
2356
2357	#[test]
2358	fn test_convert_utf8_to_utf16() {
2359	let src = "abcdefghijklmnopqrstu`\u{1F4A9}`v`\u{2603}`w`\u{00B6}`xyzz";
2360	let mut dst: Vec<u16> = Vec::with_capacity(src.len() + `1`);
2361	dst.resize(src.len() + `1`, `0`);
2362	let len = convert_utf8_to_utf16(src.as_bytes(), &mut dst[..]);
2363	dst.truncate(len);
2364	let reference: Vec<u16> = src.encode_utf16().collect();
2365	assert_eq!(dst, reference);
2366	}
2367
2368	#[test]
2369	fn test_convert_str_to_utf16() {
2370	let src = "abcdefghijklmnopqrstu`\u{1F4A9}`v`\u{2603}`w`\u{00B6}`xyzz";
2371	let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2372	dst.resize(src.len(), `0`);
2373	let len = convert_str_to_utf16(src, &mut dst[..]);
2374	dst.truncate(len);
2375	let reference: Vec<u16> = src.encode_utf16().collect();
2376	assert_eq!(dst, reference);
2377	}
2378
2379	#[test]
2380	fn test_convert_utf16_to_utf8_partial() {
2381	let reference = "abcdefghijklmnopqrstu`\u{1F4A9}`v`\u{2603}`w`\u{00B6}`xyzz";
2382	let src: Vec<u16> = reference.encode_utf16().collect();
2383	let mut dst: Vec<u8> = Vec::with_capacity(src.len() * `3` + `1`);
2384	dst.resize(src.len() * `3` + `1`, `0`);
2385	let (read, written) = convert_utf16_to_utf8_partial(&src[..], &mut dst[..`24`]);
2386	let len = written + convert_utf16_to_utf8(&src[read..], &mut dst[written..]);
2387	dst.truncate(len);
2388	assert_eq!(dst, reference.as_bytes());
2389	}
2390
2391	#[test]
2392	fn test_convert_utf16_to_utf8() {
2393	let reference = "abcdefghijklmnopqrstu`\u{1F4A9}`v`\u{2603}`w`\u{00B6}`xyzz";
2394	let src: Vec<u16> = reference.encode_utf16().collect();
2395	let mut dst: Vec<u8> = Vec::with_capacity(src.len() * `3` + `1`);
2396	dst.resize(src.len() * `3` + `1`, `0`);
2397	let len = convert_utf16_to_utf8(&src[..], &mut dst[..]);
2398	dst.truncate(len);
2399	assert_eq!(dst, reference.as_bytes());
2400	}
2401
2402	#[test]
2403	fn test_convert_latin1_to_utf16() {
2404	let mut src: Vec<u8> = Vec::with_capacity(`256`);
2405	src.resize(`256`, `0`);
2406	let mut reference: Vec<u16> = Vec::with_capacity(`256`);
2407	reference.resize(`256`, `0`);
2408	for i in `0`..`256` {
2409	src[i] = i as u8;
2410	reference[i] = i as u16;
2411	}
2412	let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2413	dst.resize(src.len(), `0`);
2414	convert_latin1_to_utf16(&src[..], &mut dst[..]);
2415	assert_eq!(dst, reference);
2416	}
2417
2418	#[test]
2419	fn test_convert_latin1_to_utf8_partial() {
2420	let mut dst = [`0u8`, `2`];
2421	let (read, written) = convert_latin1_to_utf8_partial(b"a`\xFF`", &mut dst[..]);
2422	assert_eq!(read, `1`);
2423	assert_eq!(written, `1`);
2424	}
2425
2426	#[test]
2427	fn test_convert_latin1_to_utf8() {
2428	let mut src: Vec<u8> = Vec::with_capacity(`256`);
2429	src.resize(`256`, `0`);
2430	let mut reference: Vec<u16> = Vec::with_capacity(`256`);
2431	reference.resize(`256`, `0`);
2432	for i in `0`..`256` {
2433	src[i] = i as u8;
2434	reference[i] = i as u16;
2435	}
2436	let s = String::from_utf16(&reference[..]).unwrap();
2437	let mut dst: Vec<u8> = Vec::with_capacity(src.len() * `2`);
2438	dst.resize(src.len() * `2`, `0`);
2439	let len = convert_latin1_to_utf8(&src[..], &mut dst[..]);
2440	dst.truncate(len);
2441	assert_eq!(&dst[..], s.as_bytes());
2442	}
2443
2444	#[test]
2445	fn test_convert_utf8_to_latin1_lossy() {
2446	let mut reference: Vec<u8> = Vec::with_capacity(`256`);
2447	reference.resize(`256`, `0`);
2448	let mut src16: Vec<u16> = Vec::with_capacity(`256`);
2449	src16.resize(`256`, `0`);
2450	for i in `0`..`256` {
2451	src16[i] = i as u16;
2452	reference[i] = i as u8;
2453	}
2454	let src = String::from_utf16(&src16[..]).unwrap();
2455	let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2456	dst.resize(src.len(), `0`);
2457	let len = convert_utf8_to_latin1_lossy(src.as_bytes(), &mut dst[..]);
2458	dst.truncate(len);
2459	assert_eq!(dst, reference);
2460	}
2461
2462	#[cfg(all(debug_assertions, not(fuzzing)))]
2463	#[test]
2464	#[should_panic]
2465	fn test_convert_utf8_to_latin1_lossy_panics() {
2466	let mut dst = [`0u8`; `16`];
2467	let _ = convert_utf8_to_latin1_lossy("`\u{100}`".as_bytes(), &mut dst[..]);
2468	}
2469
2470	#[test]
2471	fn test_convert_utf16_to_latin1_lossy() {
2472	let mut src: Vec<u16> = Vec::with_capacity(`256`);
2473	src.resize(`256`, `0`);
2474	let mut reference: Vec<u8> = Vec::with_capacity(`256`);
2475	reference.resize(`256`, `0`);
2476	for i in `0`..`256` {
2477	src[i] = i as u16;
2478	reference[i] = i as u8;
2479	}
2480	let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2481	dst.resize(src.len(), `0`);
2482	convert_utf16_to_latin1_lossy(&src[..], &mut dst[..]);
2483	assert_eq!(dst, reference);
2484	}
2485
2486	#[test]
2487	// #[should_panic]
2488	fn test_convert_utf16_to_latin1_lossy_panics() {
2489	let mut dst = [`0u8`; `16`];
2490	let _ = convert_utf16_to_latin1_lossy(&[`0x0100u16`], &mut dst[..]);
2491	}
2492
2493	#[test]
2494	fn test_utf16_valid_up_to() {
2495	let valid = vec![
2496	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0x2603u16`,
2497	`0xD83Du16`, `0xDCA9u16`, `0x00B6u16`,
2498	];
2499	assert_eq!(utf16_valid_up_to(&valid[..]), `16`);
2500	let lone_high = vec![
2501	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2502	`0x2603u16`, `0xD83Du16`, `0x00B6u16`,
2503	];
2504	assert_eq!(utf16_valid_up_to(&lone_high[..]), `14`);
2505	let lone_low = vec![
2506	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2507	`0x2603u16`, `0xDCA9u16`, `0x00B6u16`,
2508	];
2509	assert_eq!(utf16_valid_up_to(&lone_low[..]), `14`);
2510	let lone_high_at_end = vec![
2511	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2512	`0x2603u16`, `0x00B6u16`, `0xD83Du16`,
2513	];
2514	assert_eq!(utf16_valid_up_to(&lone_high_at_end[..]), `15`);
2515	}
2516
2517	#[test]
2518	fn test_ensure_utf16_validity() {
2519	let mut src = vec![
2520	`0u16`, `0xD83Du16`, `0u16`, `0u16`, `0u16`, `0xD83Du16`, `0xDCA9u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2521	`0u16`, `0xDCA9u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2522	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2523	];
2524	let reference = vec![
2525	`0u16`, `0xFFFDu16`, `0u16`, `0u16`, `0u16`, `0xD83Du16`, `0xDCA9u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2526	`0u16`, `0xFFFDu16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2527	`0u16`, `0u16`, `0u16`, `0u16`, `0u16`, `0u16`,
2528	];
2529	ensure_utf16_validity(&mut src[..]);
2530	assert_eq!(src, reference);
2531	}
2532
2533	#[test]
2534	fn test_is_char_bidi() {
2535	assert!(!is_char_bidi('a'));
2536	assert!(!is_char_bidi('`\u{03B1}`'));
2537	assert!(!is_char_bidi('`\u{3041}`'));
2538	assert!(!is_char_bidi('`\u{1F4A9}`'));
2539	assert!(!is_char_bidi('`\u{FE00}`'));
2540	assert!(!is_char_bidi('`\u{202C}`'));
2541	assert!(!is_char_bidi('`\u{FEFF}`'));
2542	assert!(is_char_bidi('`\u{0590}`'));
2543	assert!(is_char_bidi('`\u{08FF}`'));
2544	assert!(is_char_bidi('`\u{061C}`'));
2545	assert!(is_char_bidi('`\u{FB50}`'));
2546	assert!(is_char_bidi('`\u{FDFF}`'));
2547	assert!(is_char_bidi('`\u{FE70}`'));
2548	assert!(is_char_bidi('`\u{FEFE}`'));
2549	assert!(is_char_bidi('`\u{200F}`'));
2550	assert!(is_char_bidi('`\u{202B}`'));
2551	assert!(is_char_bidi('`\u{202E}`'));
2552	assert!(is_char_bidi('`\u{2067}`'));
2553	assert!(is_char_bidi('`\u{10800}`'));
2554	assert!(is_char_bidi('`\u{10FFF}`'));
2555	assert!(is_char_bidi('`\u{1E800}`'));
2556	assert!(is_char_bidi('`\u{1EFFF}`'));
2557	}
2558
2559	#[test]
2560	fn test_is_utf16_code_unit_bidi() {
2561	assert!(!is_utf16_code_unit_bidi(`0x0062`));
2562	assert!(!is_utf16_code_unit_bidi(`0x03B1`));
2563	assert!(!is_utf16_code_unit_bidi(`0x3041`));
2564	assert!(!is_utf16_code_unit_bidi(`0xD801`));
2565	assert!(!is_utf16_code_unit_bidi(`0xFE00`));
2566	assert!(!is_utf16_code_unit_bidi(`0x202C`));
2567	assert!(!is_utf16_code_unit_bidi(`0xFEFF`));
2568	assert!(is_utf16_code_unit_bidi(`0x0590`));
2569	assert!(is_utf16_code_unit_bidi(`0x08FF`));
2570	assert!(is_utf16_code_unit_bidi(`0x061C`));
2571	assert!(is_utf16_code_unit_bidi(`0xFB1D`));
2572	assert!(is_utf16_code_unit_bidi(`0xFB50`));
2573	assert!(is_utf16_code_unit_bidi(`0xFDFF`));
2574	assert!(is_utf16_code_unit_bidi(`0xFE70`));
2575	assert!(is_utf16_code_unit_bidi(`0xFEFE`));
2576	assert!(is_utf16_code_unit_bidi(`0x200F`));
2577	assert!(is_utf16_code_unit_bidi(`0x202B`));
2578	assert!(is_utf16_code_unit_bidi(`0x202E`));
2579	assert!(is_utf16_code_unit_bidi(`0x2067`));
2580	assert!(is_utf16_code_unit_bidi(`0xD802`));
2581	assert!(is_utf16_code_unit_bidi(`0xD803`));
2582	assert!(is_utf16_code_unit_bidi(`0xD83A`));
2583	assert!(is_utf16_code_unit_bidi(`0xD83B`));
2584	}
2585
2586	#[test]
2587	fn test_is_str_bidi() {
2588	assert!(!is_str_bidi("abcdefghijklmnopaabcdefghijklmnop"));
2589	assert!(!is_str_bidi("abcdefghijklmnop`\u{03B1}`abcdefghijklmnop"));
2590	assert!(!is_str_bidi("abcdefghijklmnop`\u{3041}`abcdefghijklmnop"));
2591	assert!(!is_str_bidi("abcdefghijklmnop`\u{1F4A9}`abcdefghijklmnop"));
2592	assert!(!is_str_bidi("abcdefghijklmnop`\u{FE00}`abcdefghijklmnop"));
2593	assert!(!is_str_bidi("abcdefghijklmnop`\u{202C}`abcdefghijklmnop"));
2594	assert!(!is_str_bidi("abcdefghijklmnop`\u{FEFF}`abcdefghijklmnop"));
2595	assert!(is_str_bidi("abcdefghijklmnop`\u{0590}`abcdefghijklmnop"));
2596	assert!(is_str_bidi("abcdefghijklmnop`\u{08FF}`abcdefghijklmnop"));
2597	assert!(is_str_bidi("abcdefghijklmnop`\u{061C}`abcdefghijklmnop"));
2598	assert!(is_str_bidi("abcdefghijklmnop`\u{FB50}`abcdefghijklmnop"));
2599	assert!(is_str_bidi("abcdefghijklmnop`\u{FDFF}`abcdefghijklmnop"));
2600	assert!(is_str_bidi("abcdefghijklmnop`\u{FE70}`abcdefghijklmnop"));
2601	assert!(is_str_bidi("abcdefghijklmnop`\u{FEFE}`abcdefghijklmnop"));
2602	assert!(is_str_bidi("abcdefghijklmnop`\u{200F}`abcdefghijklmnop"));
2603	assert!(is_str_bidi("abcdefghijklmnop`\u{202B}`abcdefghijklmnop"));
2604	assert!(is_str_bidi("abcdefghijklmnop`\u{202E}`abcdefghijklmnop"));
2605	assert!(is_str_bidi("abcdefghijklmnop`\u{2067}`abcdefghijklmnop"));
2606	assert!(is_str_bidi("abcdefghijklmnop`\u{10800}`abcdefghijklmnop"));
2607	assert!(is_str_bidi("abcdefghijklmnop`\u{10FFF}`abcdefghijklmnop"));
2608	assert!(is_str_bidi("abcdefghijklmnop`\u{1E800}`abcdefghijklmnop"));
2609	assert!(is_str_bidi("abcdefghijklmnop`\u{1EFFF}`abcdefghijklmnop"));
2610	}
2611
2612	#[test]
2613	fn test_is_utf8_bidi() {
2614	assert!(!is_utf8_bidi(
2615	"abcdefghijklmnopaabcdefghijklmnop".as_bytes()
2616	));
2617	assert!(!is_utf8_bidi(
2618	"abcdefghijklmnop`\u{03B1}`abcdefghijklmnop".as_bytes()
2619	));
2620	assert!(!is_utf8_bidi(
2621	"abcdefghijklmnop`\u{3041}`abcdefghijklmnop".as_bytes()
2622	));
2623	assert!(!is_utf8_bidi(
2624	"abcdefghijklmnop`\u{1F4A9}`abcdefghijklmnop".as_bytes()
2625	));
2626	assert!(!is_utf8_bidi(
2627	"abcdefghijklmnop`\u{FE00}`abcdefghijklmnop".as_bytes()
2628	));
2629	assert!(!is_utf8_bidi(
2630	"abcdefghijklmnop`\u{202C}`abcdefghijklmnop".as_bytes()
2631	));
2632	assert!(!is_utf8_bidi(
2633	"abcdefghijklmnop`\u{FEFF}`abcdefghijklmnop".as_bytes()
2634	));
2635	assert!(is_utf8_bidi(
2636	"abcdefghijklmnop`\u{0590}`abcdefghijklmnop".as_bytes()
2637	));
2638	assert!(is_utf8_bidi(
2639	"abcdefghijklmnop`\u{08FF}`abcdefghijklmnop".as_bytes()
2640	));
2641	assert!(is_utf8_bidi(
2642	"abcdefghijklmnop`\u{061C}`abcdefghijklmnop".as_bytes()
2643	));
2644	assert!(is_utf8_bidi(
2645	"abcdefghijklmnop`\u{FB50}`abcdefghijklmnop".as_bytes()
2646	));
2647	assert!(is_utf8_bidi(
2648	"abcdefghijklmnop`\u{FDFF}`abcdefghijklmnop".as_bytes()
2649	));
2650	assert!(is_utf8_bidi(
2651	"abcdefghijklmnop`\u{FE70}`abcdefghijklmnop".as_bytes()
2652	));
2653	assert!(is_utf8_bidi(
2654	"abcdefghijklmnop`\u{FEFE}`abcdefghijklmnop".as_bytes()
2655	));
2656	assert!(is_utf8_bidi(
2657	"abcdefghijklmnop`\u{200F}`abcdefghijklmnop".as_bytes()
2658	));
2659	assert!(is_utf8_bidi(
2660	"abcdefghijklmnop`\u{202B}`abcdefghijklmnop".as_bytes()
2661	));
2662	assert!(is_utf8_bidi(
2663	"abcdefghijklmnop`\u{202E}`abcdefghijklmnop".as_bytes()
2664	));
2665	assert!(is_utf8_bidi(
2666	"abcdefghijklmnop`\u{2067}`abcdefghijklmnop".as_bytes()
2667	));
2668	assert!(is_utf8_bidi(
2669	"abcdefghijklmnop`\u{10800}`abcdefghijklmnop".as_bytes()
2670	));
2671	assert!(is_utf8_bidi(
2672	"abcdefghijklmnop`\u{10FFF}`abcdefghijklmnop".as_bytes()
2673	));
2674	assert!(is_utf8_bidi(
2675	"abcdefghijklmnop`\u{1E800}`abcdefghijklmnop".as_bytes()
2676	));
2677	assert!(is_utf8_bidi(
2678	"abcdefghijklmnop`\u{1EFFF}`abcdefghijklmnop".as_bytes()
2679	));
2680	}
2681
2682	#[test]
2683	fn test_is_utf16_bidi() {
2684	assert!(!is_utf16_bidi(&[
2685	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0062`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2686	`0x67`, `0x68`, `0x69`,
2687	]));
2688	assert!(!is_utf16_bidi(&[
2689	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x03B1`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2690	`0x67`, `0x68`, `0x69`,
2691	]));
2692	assert!(!is_utf16_bidi(&[
2693	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x3041`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2694	`0x67`, `0x68`, `0x69`,
2695	]));
2696	assert!(!is_utf16_bidi(&[
2697	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD801`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2698	`0x67`, `0x68`, `0x69`,
2699	]));
2700	assert!(!is_utf16_bidi(&[
2701	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFE00`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2702	`0x67`, `0x68`, `0x69`,
2703	]));
2704	assert!(!is_utf16_bidi(&[
2705	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202C`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2706	`0x67`, `0x68`, `0x69`,
2707	]));
2708	assert!(!is_utf16_bidi(&[
2709	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFEFF`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2710	`0x67`, `0x68`, `0x69`,
2711	]));
2712	assert!(is_utf16_bidi(&[
2713	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0590`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2714	`0x67`, `0x68`, `0x69`,
2715	]));
2716	assert!(is_utf16_bidi(&[
2717	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x08FF`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2718	`0x67`, `0x68`, `0x69`,
2719	]));
2720	assert!(is_utf16_bidi(&[
2721	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x061C`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2722	`0x67`, `0x68`, `0x69`,
2723	]));
2724	assert!(is_utf16_bidi(&[
2725	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFB1D`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2726	`0x67`, `0x68`, `0x69`,
2727	]));
2728	assert!(is_utf16_bidi(&[
2729	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFB50`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2730	`0x67`, `0x68`, `0x69`,
2731	]));
2732	assert!(is_utf16_bidi(&[
2733	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFDFF`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2734	`0x67`, `0x68`, `0x69`,
2735	]));
2736	assert!(is_utf16_bidi(&[
2737	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFE70`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2738	`0x67`, `0x68`, `0x69`,
2739	]));
2740	assert!(is_utf16_bidi(&[
2741	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFEFE`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2742	`0x67`, `0x68`, `0x69`,
2743	]));
2744	assert!(is_utf16_bidi(&[
2745	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x200F`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2746	`0x67`, `0x68`, `0x69`,
2747	]));
2748	assert!(is_utf16_bidi(&[
2749	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202B`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2750	`0x67`, `0x68`, `0x69`,
2751	]));
2752	assert!(is_utf16_bidi(&[
2753	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202E`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2754	`0x67`, `0x68`, `0x69`,
2755	]));
2756	assert!(is_utf16_bidi(&[
2757	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x2067`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2758	`0x67`, `0x68`, `0x69`,
2759	]));
2760	assert!(is_utf16_bidi(&[
2761	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD802`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2762	`0x67`, `0x68`, `0x69`,
2763	]));
2764	assert!(is_utf16_bidi(&[
2765	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD803`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2766	`0x67`, `0x68`, `0x69`,
2767	]));
2768	assert!(is_utf16_bidi(&[
2769	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD83A`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2770	`0x67`, `0x68`, `0x69`,
2771	]));
2772	assert!(is_utf16_bidi(&[
2773	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD83B`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`,
2774	`0x67`, `0x68`, `0x69`,
2775	]));
2776
2777	assert!(is_utf16_bidi(&[
2778	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0590`, `0x3041`, `0x62`, `0x63`, `0x64`, `0x65`,
2779	`0x66`, `0x67`, `0x68`, `0x69`,
2780	]));
2781	}
2782
2783	#[test]
2784	fn test_check_str_for_latin1_and_bidi() {
2785	assert_ne!(
2786	check_str_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop"),
2787	Latin1Bidi::Bidi
2788	);
2789	assert_ne!(
2790	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{03B1}`abcdefghijklmnop"),
2791	Latin1Bidi::Bidi
2792	);
2793	assert_ne!(
2794	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{3041}`abcdefghijklmnop"),
2795	Latin1Bidi::Bidi
2796	);
2797	assert_ne!(
2798	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{1F4A9}`abcdefghijklmnop"),
2799	Latin1Bidi::Bidi
2800	);
2801	assert_ne!(
2802	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FE00}`abcdefghijklmnop"),
2803	Latin1Bidi::Bidi
2804	);
2805	assert_ne!(
2806	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{202C}`abcdefghijklmnop"),
2807	Latin1Bidi::Bidi
2808	);
2809	assert_ne!(
2810	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FEFF}`abcdefghijklmnop"),
2811	Latin1Bidi::Bidi
2812	);
2813	assert_eq!(
2814	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{0590}`abcdefghijklmnop"),
2815	Latin1Bidi::Bidi
2816	);
2817	assert_eq!(
2818	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{08FF}`abcdefghijklmnop"),
2819	Latin1Bidi::Bidi
2820	);
2821	assert_eq!(
2822	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{061C}`abcdefghijklmnop"),
2823	Latin1Bidi::Bidi
2824	);
2825	assert_eq!(
2826	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FB50}`abcdefghijklmnop"),
2827	Latin1Bidi::Bidi
2828	);
2829	assert_eq!(
2830	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FDFF}`abcdefghijklmnop"),
2831	Latin1Bidi::Bidi
2832	);
2833	assert_eq!(
2834	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FE70}`abcdefghijklmnop"),
2835	Latin1Bidi::Bidi
2836	);
2837	assert_eq!(
2838	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{FEFE}`abcdefghijklmnop"),
2839	Latin1Bidi::Bidi
2840	);
2841	assert_eq!(
2842	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{200F}`abcdefghijklmnop"),
2843	Latin1Bidi::Bidi
2844	);
2845	assert_eq!(
2846	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{202B}`abcdefghijklmnop"),
2847	Latin1Bidi::Bidi
2848	);
2849	assert_eq!(
2850	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{202E}`abcdefghijklmnop"),
2851	Latin1Bidi::Bidi
2852	);
2853	assert_eq!(
2854	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{2067}`abcdefghijklmnop"),
2855	Latin1Bidi::Bidi
2856	);
2857	assert_eq!(
2858	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{10800}`abcdefghijklmnop"),
2859	Latin1Bidi::Bidi
2860	);
2861	assert_eq!(
2862	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{10FFF}`abcdefghijklmnop"),
2863	Latin1Bidi::Bidi
2864	);
2865	assert_eq!(
2866	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{1E800}`abcdefghijklmnop"),
2867	Latin1Bidi::Bidi
2868	);
2869	assert_eq!(
2870	check_str_for_latin1_and_bidi("abcdefghijklmnop`\u{1EFFF}`abcdefghijklmnop"),
2871	Latin1Bidi::Bidi
2872	);
2873	}
2874
2875	#[test]
2876	fn test_check_utf8_for_latin1_and_bidi() {
2877	assert_ne!(
2878	check_utf8_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop".as_bytes()),
2879	Latin1Bidi::Bidi
2880	);
2881	assert_ne!(
2882	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{03B1}`abcdefghijklmnop".as_bytes()),
2883	Latin1Bidi::Bidi
2884	);
2885	assert_ne!(
2886	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{3041}`abcdefghijklmnop".as_bytes()),
2887	Latin1Bidi::Bidi
2888	);
2889	assert_ne!(
2890	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{1F4A9}`abcdefghijklmnop".as_bytes()),
2891	Latin1Bidi::Bidi
2892	);
2893	assert_ne!(
2894	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FE00}`abcdefghijklmnop".as_bytes()),
2895	Latin1Bidi::Bidi
2896	);
2897	assert_ne!(
2898	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{202C}`abcdefghijklmnop".as_bytes()),
2899	Latin1Bidi::Bidi
2900	);
2901	assert_ne!(
2902	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FEFF}`abcdefghijklmnop".as_bytes()),
2903	Latin1Bidi::Bidi
2904	);
2905	assert_eq!(
2906	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{0590}`abcdefghijklmnop".as_bytes()),
2907	Latin1Bidi::Bidi
2908	);
2909	assert_eq!(
2910	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{08FF}`abcdefghijklmnop".as_bytes()),
2911	Latin1Bidi::Bidi
2912	);
2913	assert_eq!(
2914	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{061C}`abcdefghijklmnop".as_bytes()),
2915	Latin1Bidi::Bidi
2916	);
2917	assert_eq!(
2918	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FB50}`abcdefghijklmnop".as_bytes()),
2919	Latin1Bidi::Bidi
2920	);
2921	assert_eq!(
2922	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FDFF}`abcdefghijklmnop".as_bytes()),
2923	Latin1Bidi::Bidi
2924	);
2925	assert_eq!(
2926	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FE70}`abcdefghijklmnop".as_bytes()),
2927	Latin1Bidi::Bidi
2928	);
2929	assert_eq!(
2930	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{FEFE}`abcdefghijklmnop".as_bytes()),
2931	Latin1Bidi::Bidi
2932	);
2933	assert_eq!(
2934	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{200F}`abcdefghijklmnop".as_bytes()),
2935	Latin1Bidi::Bidi
2936	);
2937	assert_eq!(
2938	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{202B}`abcdefghijklmnop".as_bytes()),
2939	Latin1Bidi::Bidi
2940	);
2941	assert_eq!(
2942	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{202E}`abcdefghijklmnop".as_bytes()),
2943	Latin1Bidi::Bidi
2944	);
2945	assert_eq!(
2946	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{2067}`abcdefghijklmnop".as_bytes()),
2947	Latin1Bidi::Bidi
2948	);
2949	assert_eq!(
2950	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{10800}`abcdefghijklmnop".as_bytes()),
2951	Latin1Bidi::Bidi
2952	);
2953	assert_eq!(
2954	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{10FFF}`abcdefghijklmnop".as_bytes()),
2955	Latin1Bidi::Bidi
2956	);
2957	assert_eq!(
2958	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{1E800}`abcdefghijklmnop".as_bytes()),
2959	Latin1Bidi::Bidi
2960	);
2961	assert_eq!(
2962	check_utf8_for_latin1_and_bidi("abcdefghijklmnop`\u{1EFFF}`abcdefghijklmnop".as_bytes()),
2963	Latin1Bidi::Bidi
2964	);
2965	}
2966
2967	#[test]
2968	fn test_check_utf16_for_latin1_and_bidi() {
2969	assert_ne!(
2970	check_utf16_for_latin1_and_bidi(&[
2971	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0062`, `0x62`, `0x63`, `0x64`, `0x65`,
2972	`0x66`, `0x67`, `0x68`, `0x69`,
2973	]),
2974	Latin1Bidi::Bidi
2975	);
2976	assert_ne!(
2977	check_utf16_for_latin1_and_bidi(&[
2978	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x03B1`, `0x62`, `0x63`, `0x64`, `0x65`,
2979	`0x66`, `0x67`, `0x68`, `0x69`,
2980	]),
2981	Latin1Bidi::Bidi
2982	);
2983	assert_ne!(
2984	check_utf16_for_latin1_and_bidi(&[
2985	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x3041`, `0x62`, `0x63`, `0x64`, `0x65`,
2986	`0x66`, `0x67`, `0x68`, `0x69`,
2987	]),
2988	Latin1Bidi::Bidi
2989	);
2990	assert_ne!(
2991	check_utf16_for_latin1_and_bidi(&[
2992	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD801`, `0x62`, `0x63`, `0x64`, `0x65`,
2993	`0x66`, `0x67`, `0x68`, `0x69`,
2994	]),
2995	Latin1Bidi::Bidi
2996	);
2997	assert_ne!(
2998	check_utf16_for_latin1_and_bidi(&[
2999	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFE00`, `0x62`, `0x63`, `0x64`, `0x65`,
3000	`0x66`, `0x67`, `0x68`, `0x69`,
3001	]),
3002	Latin1Bidi::Bidi
3003	);
3004	assert_ne!(
3005	check_utf16_for_latin1_and_bidi(&[
3006	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202C`, `0x62`, `0x63`, `0x64`, `0x65`,
3007	`0x66`, `0x67`, `0x68`, `0x69`,
3008	]),
3009	Latin1Bidi::Bidi
3010	);
3011	assert_ne!(
3012	check_utf16_for_latin1_and_bidi(&[
3013	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFEFF`, `0x62`, `0x63`, `0x64`, `0x65`,
3014	`0x66`, `0x67`, `0x68`, `0x69`,
3015	]),
3016	Latin1Bidi::Bidi
3017	);
3018	assert_eq!(
3019	check_utf16_for_latin1_and_bidi(&[
3020	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0590`, `0x62`, `0x63`, `0x64`, `0x65`,
3021	`0x66`, `0x67`, `0x68`, `0x69`,
3022	]),
3023	Latin1Bidi::Bidi
3024	);
3025	assert_eq!(
3026	check_utf16_for_latin1_and_bidi(&[
3027	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x08FF`, `0x62`, `0x63`, `0x64`, `0x65`,
3028	`0x66`, `0x67`, `0x68`, `0x69`,
3029	]),
3030	Latin1Bidi::Bidi
3031	);
3032	assert_eq!(
3033	check_utf16_for_latin1_and_bidi(&[
3034	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x061C`, `0x62`, `0x63`, `0x64`, `0x65`,
3035	`0x66`, `0x67`, `0x68`, `0x69`,
3036	]),
3037	Latin1Bidi::Bidi
3038	);
3039	assert_eq!(
3040	check_utf16_for_latin1_and_bidi(&[
3041	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFB1D`, `0x62`, `0x63`, `0x64`, `0x65`,
3042	`0x66`, `0x67`, `0x68`, `0x69`,
3043	]),
3044	Latin1Bidi::Bidi
3045	);
3046	assert_eq!(
3047	check_utf16_for_latin1_and_bidi(&[
3048	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFB50`, `0x62`, `0x63`, `0x64`, `0x65`,
3049	`0x66`, `0x67`, `0x68`, `0x69`,
3050	]),
3051	Latin1Bidi::Bidi
3052	);
3053	assert_eq!(
3054	check_utf16_for_latin1_and_bidi(&[
3055	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFDFF`, `0x62`, `0x63`, `0x64`, `0x65`,
3056	`0x66`, `0x67`, `0x68`, `0x69`,
3057	]),
3058	Latin1Bidi::Bidi
3059	);
3060	assert_eq!(
3061	check_utf16_for_latin1_and_bidi(&[
3062	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFE70`, `0x62`, `0x63`, `0x64`, `0x65`,
3063	`0x66`, `0x67`, `0x68`, `0x69`,
3064	]),
3065	Latin1Bidi::Bidi
3066	);
3067	assert_eq!(
3068	check_utf16_for_latin1_and_bidi(&[
3069	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xFEFE`, `0x62`, `0x63`, `0x64`, `0x65`,
3070	`0x66`, `0x67`, `0x68`, `0x69`,
3071	]),
3072	Latin1Bidi::Bidi
3073	);
3074	assert_eq!(
3075	check_utf16_for_latin1_and_bidi(&[
3076	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x200F`, `0x62`, `0x63`, `0x64`, `0x65`,
3077	`0x66`, `0x67`, `0x68`, `0x69`,
3078	]),
3079	Latin1Bidi::Bidi
3080	);
3081	assert_eq!(
3082	check_utf16_for_latin1_and_bidi(&[
3083	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202B`, `0x62`, `0x63`, `0x64`, `0x65`,
3084	`0x66`, `0x67`, `0x68`, `0x69`,
3085	]),
3086	Latin1Bidi::Bidi
3087	);
3088	assert_eq!(
3089	check_utf16_for_latin1_and_bidi(&[
3090	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x202E`, `0x62`, `0x63`, `0x64`, `0x65`,
3091	`0x66`, `0x67`, `0x68`, `0x69`,
3092	]),
3093	Latin1Bidi::Bidi
3094	);
3095	assert_eq!(
3096	check_utf16_for_latin1_and_bidi(&[
3097	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x2067`, `0x62`, `0x63`, `0x64`, `0x65`,
3098	`0x66`, `0x67`, `0x68`, `0x69`,
3099	]),
3100	Latin1Bidi::Bidi
3101	);
3102	assert_eq!(
3103	check_utf16_for_latin1_and_bidi(&[
3104	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD802`, `0x62`, `0x63`, `0x64`, `0x65`,
3105	`0x66`, `0x67`, `0x68`, `0x69`,
3106	]),
3107	Latin1Bidi::Bidi
3108	);
3109	assert_eq!(
3110	check_utf16_for_latin1_and_bidi(&[
3111	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD803`, `0x62`, `0x63`, `0x64`, `0x65`,
3112	`0x66`, `0x67`, `0x68`, `0x69`,
3113	]),
3114	Latin1Bidi::Bidi
3115	);
3116	assert_eq!(
3117	check_utf16_for_latin1_and_bidi(&[
3118	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD83A`, `0x62`, `0x63`, `0x64`, `0x65`,
3119	`0x66`, `0x67`, `0x68`, `0x69`,
3120	]),
3121	Latin1Bidi::Bidi
3122	);
3123	assert_eq!(
3124	check_utf16_for_latin1_and_bidi(&[
3125	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0xD83B`, `0x62`, `0x63`, `0x64`, `0x65`,
3126	`0x66`, `0x67`, `0x68`, `0x69`,
3127	]),
3128	Latin1Bidi::Bidi
3129	);
3130
3131	assert_eq!(
3132	check_utf16_for_latin1_and_bidi(&[
3133	`0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x0590`, `0x3041`, `0x62`, `0x63`, `0x64`,
3134	`0x65`, `0x66`, `0x67`, `0x68`, `0x69`,
3135	]),
3136	Latin1Bidi::Bidi
3137	);
3138	}
3139
3140	#[inline(always)]
3141	pub fn reference_is_char_bidi(c: char) -> bool {
3142	match c {
3143	'`\u{0590}`'..='`\u{08FF}`'
3144	\| '`\u{FB1D}`'..='`\u{FDFF}`'
3145	\| '`\u{FE70}`'..='`\u{FEFE}`'
3146	\| '`\u{10800}`'..='`\u{10FFF}`'
3147	\| '`\u{1E800}`'..='`\u{1EFFF}`'
3148	\| '`\u{200F}`'
3149	\| '`\u{202B}`'
3150	\| '`\u{202E}`'
3151	\| '`\u{2067}`' => `true`,
3152	_ => `false`,
3153	}
3154	}
3155
3156	#[inline(always)]
3157	pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
3158	match u {
3159	`0x0590`..=`0x08FF`
3160	\| `0xFB1D`..=`0xFDFF`
3161	\| `0xFE70`..=`0xFEFE`
3162	\| `0xD802`
3163	\| `0xD803`
3164	\| `0xD83A`
3165	\| `0xD83B`
3166	\| `0x200F`
3167	\| `0x202B`
3168	\| `0x202E`
3169	\| `0x2067` => `true`,
3170	_ => `false`,
3171	}
3172	}
3173
3174	#[test]
3175	#[cfg_attr(miri, ignore)] // Miri is too slow
3176	fn test_is_char_bidi_thoroughly() {
3177	for i in `0`..`0xD800u32` {
3178	let c: char = ::core::char::from_u32(i).unwrap();
3179	assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3180	}
3181	for i in `0xE000`..`0x110000u32` {
3182	let c: char = ::core::char::from_u32(i).unwrap();
3183	assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3184	}
3185	}
3186
3187	#[test]
3188	#[cfg_attr(miri, ignore)] // Miri is too slow
3189	fn test_is_utf16_code_unit_bidi_thoroughly() {
3190	for i in `0`..`0x10000u32` {
3191	let u = i as u16;
3192	assert_eq!(
3193	is_utf16_code_unit_bidi(u),
3194	reference_is_utf16_code_unit_bidi(u)
3195	);
3196	}
3197	}
3198
3199	#[test]
3200	#[cfg_attr(miri, ignore)] // Miri is too slow
3201	fn test_is_str_bidi_thoroughly() {
3202	let mut buf = [`0`; `4`];
3203	for i in `0`..`0xD800u32` {
3204	let c: char = ::core::char::from_u32(i).unwrap();
3205	assert_eq!(
3206	is_str_bidi(c.encode_utf8(&mut buf[..])),
3207	reference_is_char_bidi(c)
3208	);
3209	}
3210	for i in `0xE000`..`0x110000u32` {
3211	let c: char = ::core::char::from_u32(i).unwrap();
3212	assert_eq!(
3213	is_str_bidi(c.encode_utf8(&mut buf[..])),
3214	reference_is_char_bidi(c)
3215	);
3216	}
3217	}
3218
3219	#[test]
3220	#[cfg_attr(miri, ignore)] // Miri is too slow
3221	fn test_is_utf8_bidi_thoroughly() {
3222	let mut buf = [`0`; `8`];
3223	for i in `0`..`0xD800u32` {
3224	let c: char = ::core::char::from_u32(i).unwrap();
3225	let expect = reference_is_char_bidi(c);
3226	{
3227	let len = {
3228	let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3229	assert_eq!(is_utf8_bidi(bytes), expect);
3230	bytes.len()
3231	};
3232	{
3233	let tail = &mut buf[len..];
3234	for b in tail.iter_mut() {
3235	*b = `0`;
3236	}
3237	}
3238	}
3239	assert_eq!(is_utf8_bidi(&buf[..]), expect);
3240	}
3241	for i in `0xE000`..`0x110000u32` {
3242	let c: char = ::core::char::from_u32(i).unwrap();
3243	let expect = reference_is_char_bidi(c);
3244	{
3245	let len = {
3246	let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3247	assert_eq!(is_utf8_bidi(bytes), expect);
3248	bytes.len()
3249	};
3250	{
3251	let tail = &mut buf[len..];
3252	for b in tail.iter_mut() {
3253	*b = `0`;
3254	}
3255	}
3256	}
3257	assert_eq!(is_utf8_bidi(&buf[..]), expect);
3258	}
3259	}
3260
3261	#[test]
3262	#[cfg_attr(miri, ignore)] // Miri is too slow
3263	fn test_is_utf16_bidi_thoroughly() {
3264	let mut buf = [`0`; `32`];
3265	for i in `0`..`0x10000u32` {
3266	let u = i as u16;
3267	buf[`15`] = u;
3268	assert_eq!(
3269	is_utf16_bidi(&buf[..]),
3270	reference_is_utf16_code_unit_bidi(u)
3271	);
3272	}
3273	}
3274
3275	#[test]
3276	fn test_is_utf8_bidi_edge_cases() {
3277	assert!(!is_utf8_bidi(b"`\xD5\xBF\x61`"));
3278	assert!(!is_utf8_bidi(b"`\xD6\x80\x61`"));
3279	assert!(!is_utf8_bidi(b"abc"));
3280	assert!(is_utf8_bidi(b"`\xD5\xBF\xC2`"));
3281	assert!(is_utf8_bidi(b"`\xD6\x80\xC2`"));
3282	assert!(is_utf8_bidi(b"ab`\xC2`"));
3283	}
3284
3285	#[test]
3286	fn test_decode_latin1() {
3287	match decode_latin1(b"ab") {
3288	Cow::Borrowed(s) => {
3289	assert_eq!(s, "ab");
3290	}
3291	Cow::Owned(_) => {
3292	unreachable!("Should have borrowed");
3293	}
3294	}
3295	assert_eq!(decode_latin1(b"a`\xE4`"), "a`\u{E4}`");
3296	}
3297
3298	#[test]
3299	fn test_encode_latin1_lossy() {
3300	match encode_latin1_lossy("ab") {
3301	Cow::Borrowed(s) => {
3302	assert_eq!(s, b"ab");
3303	}
3304	Cow::Owned(_) => {
3305	unreachable!("Should have borrowed");
3306	}
3307	}
3308	assert_eq!(encode_latin1_lossy("a`\u{E4}`"), &(b"a`\xE4`")[..]);
3309	}
3310
3311	#[test]
3312	fn test_convert_utf8_to_utf16_without_replacement() {
3313	let mut buf = [`0u16`; `5`];
3314	assert_eq!(
3315	convert_utf8_to_utf16_without_replacement(b"ab", &mut buf[..`2`]),
3316	Some(`2`)
3317	);
3318	assert_eq!(buf[`0`], u16::from(b'a'));
3319	assert_eq!(buf[`1`], u16::from(b'b'));
3320	assert_eq!(buf[`2`], `0`);
3321	assert_eq!(
3322	convert_utf8_to_utf16_without_replacement(b"`\xC3\xA4`c", &mut buf[..`3`]),
3323	Some(`2`)
3324	);
3325	assert_eq!(buf[`0`], `0xE4`);
3326	assert_eq!(buf[`1`], u16::from(b'c'));
3327	assert_eq!(buf[`2`], `0`);
3328	assert_eq!(
3329	convert_utf8_to_utf16_without_replacement(b"`\xE2\x98\x83`", &mut buf[..`3`]),
3330	Some(`1`)
3331	);
3332	assert_eq!(buf[`0`], `0x2603`);
3333	assert_eq!(buf[`1`], u16::from(b'c'));
3334	assert_eq!(buf[`2`], `0`);
3335	assert_eq!(
3336	convert_utf8_to_utf16_without_replacement(b"`\xE2\x98\x83`d", &mut buf[..`4`]),
3337	Some(`2`)
3338	);
3339	assert_eq!(buf[`0`], `0x2603`);
3340	assert_eq!(buf[`1`], u16::from(b'd'));
3341	assert_eq!(buf[`2`], `0`);
3342	assert_eq!(
3343	convert_utf8_to_utf16_without_replacement(b"`\xE2\x98\x83\xC3\xA4`", &mut buf[..`5`]),
3344	Some(`2`)
3345	);
3346	assert_eq!(buf[`0`], `0x2603`);
3347	assert_eq!(buf[`1`], `0xE4`);
3348	assert_eq!(buf[`2`], `0`);
3349	assert_eq!(
3350	convert_utf8_to_utf16_without_replacement(b"`\xF0\x9F\x93\x8E`", &mut buf[..`4`]),
3351	Some(`2`)
3352	);
3353	assert_eq!(buf[`0`], `0xD83D`);
3354	assert_eq!(buf[`1`], `0xDCCE`);
3355	assert_eq!(buf[`2`], `0`);
3356	assert_eq!(
3357	convert_utf8_to_utf16_without_replacement(b"`\xF0\x9F\x93\x8E`e", &mut buf[..`5`]),
3358	Some(`3`)
3359	);
3360	assert_eq!(buf[`0`], `0xD83D`);
3361	assert_eq!(buf[`1`], `0xDCCE`);
3362	assert_eq!(buf[`2`], u16::from(b'e'));
3363	assert_eq!(
3364	convert_utf8_to_utf16_without_replacement(b"`\xF0\x9F\x93`", &mut buf[..`5`]),
3365	None
3366	);
3367	}
3368	}
3369