lib.rs source code [crates/unicode_bidi/src/lib.rs]

1	// Copyright 2015 The Servo Project Developers. See the
2	// COPYRIGHT file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	//! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed
11	//! right-to-left and left-to-right text. It is written in safe Rust, compatible with the
12	//! current stable release.
13	//!
14	//! ## Example
15	//!
16	//! ```rust
17	//! # #[cfg(feature = "hardcoded-data")] {
18	//! use unicode_bidi::BidiInfo;
19	//!
20	//! // This example text is defined using `concat!` because some browsers
21	//! // and text editors have trouble displaying bidi strings.
22	//! let text = concat![
23	//! "א",
24	//! "ב",
25	//! "ג",
26	//! "a",
27	//! "b",
28	//! "c",
29	//! ];
30	//!
31	//! // Resolve embedding levels within the text. Pass `None` to detect the
32	//! // paragraph level automatically.
33	//! let bidi_info = BidiInfo::new(&text, None);
34	//!
35	//! // This paragraph has embedding level 1 because its first strong character is RTL.
36	//! assert_eq!(bidi_info.paragraphs.len(), `1`);
37	//! let para = &bidi_info.paragraphs[`0`];
38	//! assert_eq!(para.level.number(), `1`);
39	//! assert_eq!(para.level.is_rtl(), `true`);
40	//!
41	//! // Re-ordering is done after wrapping each paragraph into a sequence of
42	//! // lines. For this example, I'll just use a single line that spans the
43	//! // entire paragraph.
44	//! let line = para.range.clone();
45	//!
46	//! let display = bidi_info.reorder_line(para, line);
47	//! assert_eq!(display, concat![
48	//! "a",
49	//! "b",
50	//! "c",
51	//! "ג",
52	//! "ב",
53	//! "א",
54	//! ]);
55	//! # } // feature = "hardcoded-data"
56	//! ```
57	//!
58	//! # Features
59	//!
60	//! - `std`: Enabled by default, but can be disabled to make `unicode_bidi`
61	//! `#![no_std]` + `alloc` compatible.
62	//! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs.
63	//! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`]
64	//! implementations to relevant types.
65	//!
66	//! [tr9]: <http://www.unicode.org/reports/tr9/>
67
68	#![no_std]
69	// We need to link to std to make doc tests work on older Rust versions
70	#[cfg(feature = "std")]
71	extern crate std;
72	#[macro_use]
73	extern crate alloc;
74	#[cfg(feature = "smallvec")]
75	extern crate smallvec;
76
77	pub mod data_source;
78	pub mod deprecated;
79	pub mod format_chars;
80	pub mod level;
81	pub mod utf16;
82
83	mod char_data;
84	mod explicit;
85	mod implicit;
86	mod prepare;
87
88	pub use crate::char_data::{BidiClass, UNICODE_VERSION};
89	pub use crate::data_source::BidiDataSource;
90	pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
91	pub use crate::prepare::{LevelRun, LevelRunVec};
92
93	#[cfg(feature = "hardcoded-data")]
94	pub use crate::char_data::{bidi_class, HardcodedBidiData};
95
96	use alloc::borrow::Cow;
97	use alloc::string::String;
98	use alloc::vec::Vec;
99	use core::char;
100	use core::cmp;
101	use core::iter::repeat;
102	use core::ops::Range;
103	use core::str::CharIndices;
104	#[cfg(feature = "smallvec")]
105	use smallvec::SmallVec;
106
107	use crate::format_chars as chars;
108	use crate::BidiClass::*;
109
110	/// Trait that abstracts over a text source for use by the bidi algorithms.
111	/// We implement this for str (UTF-8) and for [u16] (UTF-16, native-endian).
112	/// (For internal unicode-bidi use; API may be unstable.)
113	/// This trait is sealed and cannot be implemented for types outside this crate.
114	pub trait TextSource<'text>: private::Sealed {
115	type CharIter: Iterator<Item = char>;
116	type CharIndexIter: Iterator<Item = (usize, char)>;
117	type IndexLenIter: Iterator<Item = (usize, usize)>;
118
119	/// Return the length of the text in code units.
120	#[doc(hidden)]
121	fn len(&self) -> usize;
122
123	/// Get the character at a given code unit index, along with its length in code units.
124	/// Returns None if index is out of range, or points inside a multi-code-unit character.
125	/// Returns REPLACEMENT_CHARACTER for any unpaired surrogates in UTF-16.
126	#[doc(hidden)]
127	fn char_at(&self, index: usize) -> Option<(char, usize)>;
128
129	/// Return a subrange of the text, indexed by code units.
130	/// (We don't implement all of the Index trait, just the minimum we use.)
131	#[doc(hidden)]
132	fn subrange(&self, range: Range<usize>) -> &Self;
133
134	/// An iterator over the text returning Unicode characters,
135	/// REPLACEMENT_CHAR for invalid code units.
136	#[doc(hidden)]
137	fn chars(&'text self) -> Self::CharIter;
138
139	/// An iterator over the text returning (index, char) tuples,
140	/// where index is the starting code-unit index of the character,
141	/// and char is its Unicode value (or REPLACEMENT_CHAR if invalid).
142	#[doc(hidden)]
143	fn char_indices(&'text self) -> Self::CharIndexIter;
144
145	/// An iterator over the text returning (index, length) tuples,
146	/// where index is the starting code-unit index of the character,
147	/// and length is its length in code units.
148	#[doc(hidden)]
149	fn indices_lengths(&'text self) -> Self::IndexLenIter;
150
151	/// Number of code units the given character uses.
152	#[doc(hidden)]
153	fn char_len(ch: char) -> usize;
154	}
155
156	mod private {
157	pub trait Sealed {}
158
159	// Implement for str and [u16] only.
160	impl Sealed for str {}
161	impl Sealed for [u16] {}
162	}
163
164	#[derive(PartialEq, Debug)]
165	pub enum Direction {
166	Ltr,
167	Rtl,
168	Mixed,
169	}
170
171	/// Bidi information about a single paragraph
172	#[derive(Clone, Debug, PartialEq)]
173	pub struct ParagraphInfo {
174	/// The paragraphs boundaries within the text, as byte indices.
175	///
176	/// TODO: Shrink this to only include the starting index?
177	pub range: Range<usize>,
178
179	/// The paragraph embedding level.
180	///
181	/// <http://www.unicode.org/reports/tr9/#BD4>
182	pub level: Level,
183	}
184
185	impl ParagraphInfo {
186	/// Gets the length of the paragraph in the source text.
187	pub fn len(&self) -> usize {
188	self.range.end - self.range.start
189	}
190	}
191
192	/// Initial bidi information of the text.
193	///
194	/// Contains the text paragraphs and `BidiClass` of its characters.
195	#[derive(PartialEq, Debug)]
196	pub struct InitialInfo<'text> {
197	/// The text
198	pub text: &'text str,
199
200	/// The BidiClass of the character at each byte in the text.
201	/// If a character is multiple bytes, its class will appear multiple times in the vector.
202	pub original_classes: Vec<BidiClass>,
203
204	/// The boundaries and level of each paragraph within the text.
205	pub paragraphs: Vec<ParagraphInfo>,
206	}
207
208	impl<'text> InitialInfo<'text> {
209	/// Find the paragraphs and BidiClasses in a string of text.
210	///
211	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
212	///
213	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
214	/// character is found before the matching PDI. If no strong character is found, the class will
215	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
216	///
217	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
218	#[cfg_attr(feature = "flame_it", flamer::flame)]
219	#[cfg(feature = "hardcoded-data")]
220	pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> {
221	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
222	}
223
224	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
225	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
226	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
227	///
228	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
229	///
230	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
231	/// character is found before the matching PDI. If no strong character is found, the class will
232	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
233	#[cfg_attr(feature = "flame_it", flamer::flame)]
234	pub fn new_with_data_source<'a, D: BidiDataSource>(
235	data_source: &D,
236	text: &'a str,
237	default_para_level: Option<Level>,
238	) -> InitialInfo<'a> {
239	InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
240	}
241	}
242
243	/// Extended version of InitialInfo (not public API).
244	#[derive(PartialEq, Debug)]
245	struct InitialInfoExt<'text> {
246	/// The base InitialInfo for the text, recording its paragraphs and bidi classes.
247	base: InitialInfo<'text>,
248
249	/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
250	/// requires no further bidi processing (i.e. there are no RTL characters or bidi
251	/// control codes present), and whether any bidi isolation controls are present.
252	flags: Vec<ParagraphInfoFlags>,
253	}
254
255	#[derive(PartialEq, Debug)]
256	struct ParagraphInfoFlags {
257	is_pure_ltr: bool,
258	has_isolate_controls: bool,
259	}
260
261	impl<'text> InitialInfoExt<'text> {
262	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
263	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
264	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
265	///
266	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
267	///
268	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
269	/// character is found before the matching PDI. If no strong character is found, the class will
270	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
271	#[cfg_attr(feature = "flame_it", flamer::flame)]
272	pub fn new_with_data_source<'a, D: BidiDataSource>(
273	data_source: &D,
274	text: &'a str,
275	default_para_level: Option<Level>,
276	) -> InitialInfoExt<'a> {
277	let mut paragraphs = Vec::<ParagraphInfo>::new();
278	let mut flags = Vec::<ParagraphInfoFlags>::new();
279	let (original_classes, _, _, _) = compute_initial_info(
280	data_source,
281	text,
282	default_para_level,
283	Some((&mut paragraphs, &mut flags)),
284	);
285
286	InitialInfoExt {
287	base: InitialInfo {
288	text,
289	original_classes,
290	paragraphs,
291	},
292	flags,
293	}
294	}
295	}
296
297	/// Implementation of initial-info computation for both BidiInfo and ParagraphBidiInfo.
298	/// To treat the text as (potentially) multiple paragraphs, the caller should pass the
299	/// pair of optional outparam arrays to receive the ParagraphInfo and pure-ltr flags
300	/// for each paragraph. Passing None for split_paragraphs will ignore any paragraph-
301	/// separator characters in the text, treating it just as a single paragraph.
302	/// Returns the array of BidiClass values for each code unit of the text, along with
303	/// the embedding level and pure-ltr flag for the last* (or only) paragraph.*
304	fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
305	data_source: &D,
306	text: &'a T,
307	default_para_level: Option<Level>,
308	mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
309	) -> (Vec<BidiClass>, Level, bool, bool) {
310	let mut original_classes = Vec::with_capacity(text.len());
311
312	// The stack contains the starting code unit index for each nested isolate we're inside.
313	#[cfg(feature = "smallvec")]
314	let mut isolate_stack = SmallVec::<[usize; `8`]>::new();
315	#[cfg(not(feature = "smallvec"))]
316	let mut isolate_stack = Vec::new();
317
318	debug_assert!(
319	if let Some((ref paragraphs, ref flags)) = split_paragraphs {
320	paragraphs.is_empty() && flags.is_empty()
321	} else {
322	`true`
323	}
324	);
325
326	let mut para_start = `0`;
327	let mut para_level = default_para_level;
328
329	// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
330	// RTL characters or bidi control characters are encountered in the paragraph.
331	let mut is_pure_ltr = `true`;
332	// Set to true if any bidi isolation controls are present in the paragraph.
333	let mut has_isolate_controls = `false`;
334
335	#[cfg(feature = "flame_it")]
336	flame::start("compute_initial_info(): iter text.char_indices()");
337
338	for (i, c) in text.char_indices() {
339	let class = data_source.bidi_class(c);
340
341	#[cfg(feature = "flame_it")]
342	flame::start("original_classes.extend()");
343
344	let len = T::char_len(c);
345	original_classes.extend(repeat(class).take(len));
346
347	#[cfg(feature = "flame_it")]
348	flame::end("original_classes.extend()");
349
350	match class {
351	B => {
352	if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
353	// P1. Split the text into separate paragraphs. The paragraph separator is kept
354	// with the previous paragraph.
355	let para_end = i + len;
356	paragraphs.push(ParagraphInfo {
357	range: para_start..para_end,
358	// P3. If no character is found in p2, set the paragraph level to zero.
359	level: para_level.unwrap_or(LTR_LEVEL),
360	});
361	flags.push(ParagraphInfoFlags {
362	is_pure_ltr,
363	has_isolate_controls,
364	});
365	// Reset state for the start of the next paragraph.
366	para_start = para_end;
367	// TODO: Support defaulting to direction of previous paragraph
368	//
369	// <http://www.unicode.org/reports/tr9/#HL1>
370	para_level = default_para_level;
371	is_pure_ltr = `true`;
372	has_isolate_controls = `false`;
373	isolate_stack.clear();
374	}
375	}
376
377	L \| R \| AL => {
378	if class != L {
379	is_pure_ltr = `false`;
380	}
381	match isolate_stack.last() {
382	Some(&start) => {
383	if original_classes[start] == FSI {
384	// X5c. If the first strong character between FSI and its matching
385	// PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
386	for j in `0`..T::char_len(chars::FSI) {
387	original_classes[start + j] = if class == L { LRI } else { RLI };
388	}
389	}
390	}
391
392	None => {
393	if para_level.is_none() {
394	// P2. Find the first character of type L, AL, or R, while skipping
395	// any characters between an isolate initiator and its matching
396	// PDI.
397	para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
398	}
399	}
400	}
401	}
402
403	AN \| LRE \| RLE \| LRO \| RLO => {
404	is_pure_ltr = `false`;
405	}
406
407	RLI \| LRI \| FSI => {
408	is_pure_ltr = `false`;
409	has_isolate_controls = `true`;
410	isolate_stack.push(i);
411	}
412
413	PDI => {
414	isolate_stack.pop();
415	}
416
417	_ => {}
418	}
419	}
420
421	if let Some((paragraphs, flags)) = split_paragraphs {
422	if para_start < text.len() {
423	paragraphs.push(ParagraphInfo {
424	range: para_start..text.len(),
425	level: para_level.unwrap_or(LTR_LEVEL),
426	});
427	flags.push(ParagraphInfoFlags {
428	is_pure_ltr,
429	has_isolate_controls,
430	});
431	}
432	debug_assert_eq!(paragraphs.len(), flags.len());
433	}
434	debug_assert_eq!(original_classes.len(), text.len());
435
436	#[cfg(feature = "flame_it")]
437	flame::end("compute_initial_info(): iter text.char_indices()");
438
439	(
440	original_classes,
441	para_level.unwrap_or(LTR_LEVEL),
442	is_pure_ltr,
443	has_isolate_controls,
444	)
445	}
446
447	/// Bidi information of the text.
448	///
449	/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
450	/// character is multiple bytes wide, then its class and level will appear multiple times in these
451	/// vectors.
452	// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
453	#[derive(Debug, PartialEq)]
454	pub struct BidiInfo<'text> {
455	/// The text
456	pub text: &'text str,
457
458	/// The BidiClass of the character at each byte in the text.
459	pub original_classes: Vec<BidiClass>,
460
461	/// The directional embedding level of each byte in the text.
462	pub levels: Vec<Level>,
463
464	/// The boundaries and paragraph embedding level of each paragraph within the text.
465	///
466	/// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
467	/// Or just don't include the first paragraph, which always starts at 0?
468	pub paragraphs: Vec<ParagraphInfo>,
469	}
470
471	impl<'text> BidiInfo<'text> {
472	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
473	///
474	///
475	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
476	///
477	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
478	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
479	///
480	/// TODO: Support auto-RTL base direction
481	#[cfg_attr(feature = "flame_it", flamer::flame)]
482	#[cfg(feature = "hardcoded-data")]
483	#[inline]
484	pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> {
485	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
486	}
487
488	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
489	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
490	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
491	///
492	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
493	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
494	///
495	/// TODO: Support auto-RTL base direction
496	#[cfg_attr(feature = "flame_it", flamer::flame)]
497	pub fn new_with_data_source<'a, D: BidiDataSource>(
498	data_source: &D,
499	text: &'a str,
500	default_para_level: Option<Level>,
501	) -> BidiInfo<'a> {
502	let InitialInfoExt { base, flags, .. } =
503	InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
504
505	let mut levels = Vec::<Level>::with_capacity(text.len());
506	let mut processing_classes = base.original_classes.clone();
507
508	for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
509	let text = &text[para.range.clone()];
510	let original_classes = &base.original_classes[para.range.clone()];
511
512	compute_bidi_info_for_para(
513	data_source,
514	para,
515	flags.is_pure_ltr,
516	flags.has_isolate_controls,
517	text,
518	original_classes,
519	&mut processing_classes,
520	&mut levels,
521	);
522	}
523
524	BidiInfo {
525	text,
526	original_classes: base.original_classes,
527	paragraphs: base.paragraphs,
528	levels,
529	}
530	}
531
532	/// Produce the levels for this paragraph as needed for reordering, one level per byte
533	/// in the paragraph. The returned vector includes bytes that are not included
534	/// in the `line`, but will not adjust them.
535	///
536	/// This runs [Rule L1], you can run
537	/// [Rule L2] by calling [`Self::reorder_visual()`].
538	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
539	/// to avoid non-byte indices.
540	///
541	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
542	///
543	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
544	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
545	#[cfg_attr(feature = "flame_it", flamer::flame)]
546	pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
547	assert!(line.start <= self.levels.len());
548	assert!(line.end <= self.levels.len());
549
550	let mut levels = self.levels.clone();
551	let line_classes = &self.original_classes[line.clone()];
552	let line_levels = &mut levels[line.clone()];
553
554	reorder_levels(
555	line_classes,
556	line_levels,
557	self.text.subrange(line),
558	para.level,
559	);
560
561	levels
562	}
563
564	/// Produce the levels for this paragraph as needed for reordering, one level per character
565	/// in the paragraph. The returned vector includes characters that are not included
566	/// in the `line`, but will not adjust them.
567	///
568	/// This runs [Rule L1], you can run
569	/// [Rule L2] by calling [`Self::reorder_visual()`].
570	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
571	/// to avoid non-byte indices.
572	///
573	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
574	///
575	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
576	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
577	#[cfg_attr(feature = "flame_it", flamer::flame)]
578	pub fn reordered_levels_per_char(
579	&self,
580	para: &ParagraphInfo,
581	line: Range<usize>,
582	) -> Vec<Level> {
583	let levels = self.reordered_levels(para, line);
584	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
585	}
586
587	/// Re-order a line based on resolved levels and return the line in display order.
588	///
589	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
590	///
591	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
592	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
593	#[cfg_attr(feature = "flame_it", flamer::flame)]
594	pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
595	if !level::has_rtl(&self.levels[line.clone()]) {
596	return self.text[line].into();
597	}
598	let (levels, runs) = self.visual_runs(para, line.clone());
599	reorder_line(self.text, line, levels, runs)
600	}
601
602	/// Reorders pre-calculated levels of a sequence of characters.
603	///
604	/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
605	/// intended to be used when an application has determined the levels of the objects (character sequences)
606	/// and just needs to have them reordered.
607	///
608	/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
609	///
610	/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
611	/// information about the actual text.
612	///
613	/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
614	/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
615	/// is for a single code point.
616	///
617	///
618	/// # # Example
619	/// ```
620	/// use unicode_bidi::BidiInfo;
621	/// use unicode_bidi::Level;
622	///
623	/// let l0 = Level::from(`0`);
624	/// let l1 = Level::from(`1`);
625	/// let l2 = Level::from(`2`);
626	///
627	/// let levels = vec![l0, l0, l0, l0];
628	/// let index_map = BidiInfo::reorder_visual(&levels);
629	/// assert_eq!(levels.len(), index_map.len());
630	/// assert_eq!(index_map, [`0`, `1`, `2`, `3`]);
631	///
632	/// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
633	/// let index_map = BidiInfo::reorder_visual(&levels);
634	/// assert_eq!(levels.len(), index_map.len());
635	/// assert_eq!(index_map, [`0`, `1`, `2`, `6`, `7`, `5`, `4`, `3`]);
636	/// ```
637	#[cfg_attr(feature = "flame_it", flamer::flame)]
638	#[inline]
639	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
640	reorder_visual(levels)
641	}
642
643	/// Find the level runs within a line and return them in visual order.
644	///
645	/// `line` is a range of bytes indices within `levels`.
646	///
647	/// The first return value is a vector of levels used by the reordering algorithm,
648	/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
649	/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
650	/// same level) should be displayed. Within each run, the display order can be checked
651	/// against the Level vector.
652	///
653	/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
654	/// as that should be handled by the engine using this API.
655	///
656	/// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
657	/// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
658	/// of producing a level map, since one may wish to deal with the fact that this is operating on
659	/// byte rather than character indices.
660	///
661	/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
662	///
663	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
664	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
665	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
666	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
667	#[cfg_attr(feature = "flame_it", flamer::flame)]
668	#[inline]
669	pub fn visual_runs(
670	&self,
671	para: &ParagraphInfo,
672	line: Range<usize>,
673	) -> (Vec<Level>, Vec<LevelRun>) {
674	let levels = self.reordered_levels(para, line.clone());
675	visual_runs_for_line(levels, &line)
676	}
677
678	/// If processed text has any computed RTL levels
679	///
680	/// This information is usually used to skip re-ordering of text when no RTL level is present
681	#[inline]
682	pub fn has_rtl(&self) -> bool {
683	level::has_rtl(&self.levels)
684	}
685	}
686
687	/// Bidi information of text treated as a single paragraph.
688	///
689	/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
690	/// character is multiple bytes wide, then its class and level will appear multiple times in these
691	/// vectors.
692	#[derive(Debug, PartialEq)]
693	pub struct ParagraphBidiInfo<'text> {
694	/// The text
695	pub text: &'text str,
696
697	/// The BidiClass of the character at each byte in the text.
698	pub original_classes: Vec<BidiClass>,
699
700	/// The directional embedding level of each byte in the text.
701	pub levels: Vec<Level>,
702
703	/// The paragraph embedding level.
704	pub paragraph_level: Level,
705
706	/// Whether the paragraph is purely LTR.
707	pub is_pure_ltr: bool,
708	}
709
710	impl<'text> ParagraphBidiInfo<'text> {
711	/// Determine the bidi embedding level.
712	///
713	///
714	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
715	///
716	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
717	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
718	///
719	/// TODO: Support auto-RTL base direction
720	#[cfg_attr(feature = "flame_it", flamer::flame)]
721	#[cfg(feature = "hardcoded-data")]
722	#[inline]
723	pub fn new(text: &str, default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
724	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
725	}
726
727	/// Determine the bidi embedding level, with a custom [`BidiDataSource`]
728	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
729	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
730	///
731	/// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
732	/// and should be kept in sync with it.
733	#[cfg_attr(feature = "flame_it", flamer::flame)]
734	pub fn new_with_data_source<'a, D: BidiDataSource>(
735	data_source: &D,
736	text: &'a str,
737	default_para_level: Option<Level>,
738	) -> ParagraphBidiInfo<'a> {
739	// Here we could create a ParagraphInitialInfo struct to parallel the one
740	// used by BidiInfo, but there doesn't seem any compelling reason for it.
741	let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
742	compute_initial_info(data_source, text, default_para_level, None);
743
744	let mut levels = Vec::<Level>::with_capacity(text.len());
745	let mut processing_classes = original_classes.clone();
746
747	let para_info = ParagraphInfo {
748	range: Range {
749	start: `0`,
750	end: text.len(),
751	},
752	level: paragraph_level,
753	};
754
755	compute_bidi_info_for_para(
756	data_source,
757	&para_info,
758	is_pure_ltr,
759	has_isolate_controls,
760	text,
761	&original_classes,
762	&mut processing_classes,
763	&mut levels,
764	);
765
766	ParagraphBidiInfo {
767	text,
768	original_classes,
769	levels,
770	paragraph_level,
771	is_pure_ltr,
772	}
773	}
774
775	/// Produce the levels for this paragraph as needed for reordering, one level per byte
776	/// in the paragraph. The returned vector includes bytes that are not included
777	/// in the `line`, but will not adjust them.
778	///
779	/// See BidiInfo::reordered_levels for details.
780	///
781	/// (This should be kept in sync with BidiInfo::reordered_levels.)
782	#[cfg_attr(feature = "flame_it", flamer::flame)]
783	pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
784	assert!(line.start <= self.levels.len());
785	assert!(line.end <= self.levels.len());
786
787	let mut levels = self.levels.clone();
788	let line_classes = &self.original_classes[line.clone()];
789	let line_levels = &mut levels[line.clone()];
790
791	reorder_levels(
792	line_classes,
793	line_levels,
794	self.text.subrange(line),
795	self.paragraph_level,
796	);
797
798	levels
799	}
800
801	/// Produce the levels for this paragraph as needed for reordering, one level per character
802	/// in the paragraph. The returned vector includes characters that are not included
803	/// in the `line`, but will not adjust them.
804	///
805	/// See BidiInfo::reordered_levels_per_char for details.
806	///
807	/// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
808	#[cfg_attr(feature = "flame_it", flamer::flame)]
809	pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
810	let levels = self.reordered_levels(line);
811	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
812	}
813
814	/// Re-order a line based on resolved levels and return the line in display order.
815	///
816	/// See BidiInfo::reorder_line for details.
817	///
818	/// (This should be kept in sync with BidiInfo::reorder_line.)
819	#[cfg_attr(feature = "flame_it", flamer::flame)]
820	pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, str> {
821	if !level::has_rtl(&self.levels[line.clone()]) {
822	return self.text[line].into();
823	}
824
825	let (levels, runs) = self.visual_runs(line.clone());
826
827	reorder_line(self.text, line, levels, runs)
828	}
829
830	/// Reorders pre-calculated levels of a sequence of characters.
831	///
832	/// See BidiInfo::reorder_visual for details.
833	#[cfg_attr(feature = "flame_it", flamer::flame)]
834	#[inline]
835	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
836	reorder_visual(levels)
837	}
838
839	/// Find the level runs within a line and return them in visual order.
840	///
841	/// `line` is a range of bytes indices within `levels`.
842	///
843	/// See BidiInfo::visual_runs for details.
844	///
845	/// (This should be kept in sync with BidiInfo::visual_runs.)
846	#[cfg_attr(feature = "flame_it", flamer::flame)]
847	#[inline]
848	pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
849	let levels = self.reordered_levels(line.clone());
850	visual_runs_for_line(levels, &line)
851	}
852
853	/// If processed text has any computed RTL levels
854	///
855	/// This information is usually used to skip re-ordering of text when no RTL level is present
856	#[inline]
857	pub fn has_rtl(&self) -> bool {
858	!self.is_pure_ltr
859	}
860
861	/// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
862	#[inline]
863	pub fn direction(&self) -> Direction {
864	para_direction(&self.levels)
865	}
866	}
867
868	/// Return a line of the text in display order based on resolved levels.
869	///
870	/// `text` the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
871	/// `line` a range of byte indices within `text` corresponding to one line
872	/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
873	/// `runs` array of `LevelRun`s in visual order
874	///
875	/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
876	/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
877	///
878	/// Returns: the reordered text of the line.
879	///
880	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
881	///
882	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
883	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
884	fn reorder_line(
885	text: &str,
886	line: Range<usize>,
887	levels: Vec<Level>,
888	runs: Vec<LevelRun>,
889	) -> Cow<'_, str> {
890	// If all isolating run sequences are LTR, no reordering is needed
891	if runs.iter().all(\|run: &Range\| levels[run.start].is_ltr()) {
892	return text[line].into();
893	}
894
895	let mut result: String = String::with_capacity(line.len());
896	for run: Range in runs {
897	if levels[run.start].is_rtl() {
898	result.extend(iter:text[run].chars().rev());
899	} else {
900	result.push_str(&text[run]);
901	}
902	}
903	result.into()
904	}
905
906	/// Find the level runs within a line and return them in visual order.
907	///
908	/// `line` is a range of code-unit indices within `levels`.
909	///
910	/// The first return value is a vector of levels used by the reordering algorithm,
911	/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
912	/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
913	/// same level) should be displayed. Within each run, the display order can be checked
914	/// against the Level vector.
915	///
916	/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
917	/// as that should be handled by the engine using this API.
918	///
919	/// Conceptually, this is the same as running [`reordered_levels()`] followed by
920	/// [`reorder_visual()`], however it returns the result as a list of level runs instead
921	/// of producing a level map, since one may wish to deal with the fact that this is operating on
922	/// byte rather than character indices.
923	///
924	/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
925	///
926	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
927	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
928	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
929	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
930	fn visual_runs_for_line(levels: Vec<Level>, line: &Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
931	// Find consecutive level runs.
932	let mut runs = Vec::new();
933	let mut start = line.start;
934	let mut run_level = levels[start];
935	let mut min_level = run_level;
936	let mut max_level = run_level;
937
938	for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + `1`) {
939	if new_level != run_level {
940	// End of the previous run, start of a new one.
941	runs.push(start..i);
942	start = i;
943	run_level = new_level;
944	min_level = cmp::min(run_level, min_level);
945	max_level = cmp::max(run_level, max_level);
946	}
947	}
948	runs.push(start..line.end);
949
950	let run_count = runs.len();
951
952	// Re-order the odd runs.
953	// <http://www.unicode.org/reports/tr9/#L2>
954
955	// Stop at the lowest odd* level.*
956	min_level = min_level.new_lowest_ge_rtl().expect("Level error");
957	// This loop goes through contiguous chunks of level runs that have a level
958	// ≥ max_level and reverses their contents, reducing max_level by 1 each time.
959	while max_level >= min_level {
960	// Look for the start of a sequence of consecutive runs of max_level or higher.
961	let mut seq_start = `0`;
962	while seq_start < run_count {
963	if levels[runs[seq_start].start] < max_level {
964	seq_start += `1`;
965	continue;
966	}
967
968	// Found the start of a sequence. Now find the end.
969	let mut seq_end = seq_start + `1`;
970	while seq_end < run_count {
971	if levels[runs[seq_end].start] < max_level {
972	break;
973	}
974	seq_end += `1`;
975	}
976	// Reverse the runs within this sequence.
977	runs[seq_start..seq_end].reverse();
978
979	seq_start = seq_end;
980	}
981	max_level
982	.lower(`1`)
983	.expect("Lowering embedding level below zero");
984	}
985	(levels, runs)
986	}
987
988	/// Reorders pre-calculated levels of a sequence of characters.
989	///
990	/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
991	/// intended to be used when an application has determined the levels of the objects (character sequences)
992	/// and just needs to have them reordered.
993	///
994	/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
995	///
996	/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
997	/// information about the actual text.
998	///
999	/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
1000	/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
1001	/// is for a single code point.
1002	fn reorder_visual(levels: &[Level]) -> Vec<usize> {
1003	// Gets the next range of characters after start_index with a level greater
1004	// than or equal to `max`
1005	fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> {
1006	if levels.is_empty() \|\| start_index >= levels.len() {
1007	return start_index..start_index;
1008	}
1009	while let Some(l) = levels.get(start_index) {
1010	if *l >= max {
1011	break;
1012	}
1013	start_index += `1`;
1014	}
1015
1016	if levels.get(start_index).is_none() {
1017	// If at the end of the array, adding one will
1018	// produce an out-of-range end element
1019	return start_index..start_index;
1020	}
1021
1022	let mut end_index = start_index + `1`;
1023	while let Some(l) = levels.get(end_index) {
1024	if *l < max {
1025	return start_index..end_index;
1026	}
1027	end_index += `1`;
1028	}
1029
1030	start_index..end_index
1031	}
1032
1033	// This implementation is similar to the L2 implementation in `visual_runs()`
1034	// but it cannot benefit from a precalculated LevelRun vector so needs to be different.
1035
1036	if levels.is_empty() {
1037	return vec![];
1038	}
1039
1040	// Get the min and max levels
1041	let (mut min, mut max) = levels
1042	.iter()
1043	.fold((levels[`0`], levels[`0`]), \|(min, max), &l\| {
1044	(cmp::min(min, l), cmp::max(max, l))
1045	});
1046
1047	// Initialize an index map
1048	let mut result: Vec<usize> = (`0`..levels.len()).collect();
1049
1050	if min == max && min.is_ltr() {
1051	// Everything is LTR and at the same level, do nothing
1052	return result;
1053	}
1054
1055	// Stop at the lowest odd* level, since everything below that*
1056	// is LTR and does not need further reordering
1057	min = min.new_lowest_ge_rtl().expect("Level error");
1058
1059	// For each max level, take all contiguous chunks of
1060	// levels ≥ max and reverse them
1061	//
1062	// We can do this check with the original levels instead of checking reorderings because all
1063	// prior reorderings will have been for contiguous chunks of levels >> max, which will
1064	// be a subset of these chunks anyway.
1065	while min <= max {
1066	let mut range = `0`..`0`;
1067	loop {
1068	range = next_range(levels, range.end, max);
1069	result[range.clone()].reverse();
1070
1071	if range.end >= levels.len() {
1072	break;
1073	}
1074	}
1075
1076	max.lower(`1`).expect("Level error");
1077	}
1078
1079	result
1080	}
1081
1082	/// The core of BidiInfo initialization, factored out into a function that both
1083	/// the utf-8 and utf-16 versions of BidiInfo can use.
1084	fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1085	data_source: &D,
1086	para: &ParagraphInfo,
1087	is_pure_ltr: bool,
1088	has_isolate_controls: bool,
1089	text: &'a T,
1090	original_classes: &[BidiClass],
1091	processing_classes: &mut [BidiClass],
1092	levels: &mut Vec<Level>,
1093	) {
1094	let new_len = levels.len() + para.range.len();
1095	levels.resize(new_len, para.level);
1096	if para.level == LTR_LEVEL && is_pure_ltr {
1097	return;
1098	}
1099
1100	let processing_classes = &mut processing_classes[para.range.clone()];
1101	let levels = &mut levels[para.range.clone()];
1102	let mut level_runs = LevelRunVec::new();
1103
1104	explicit::compute(
1105	text,
1106	para.level,
1107	original_classes,
1108	levels,
1109	processing_classes,
1110	&mut level_runs,
1111	);
1112
1113	let mut sequences = prepare::IsolatingRunSequenceVec::new();
1114	prepare::isolating_run_sequences(
1115	para.level,
1116	original_classes,
1117	levels,
1118	level_runs,
1119	has_isolate_controls,
1120	&mut sequences,
1121	);
1122	for sequence in &sequences {
1123	implicit::resolve_weak(text, sequence, processing_classes);
1124	implicit::resolve_neutral(
1125	text,
1126	data_source,
1127	sequence,
1128	levels,
1129	original_classes,
1130	processing_classes,
1131	);
1132	}
1133
1134	implicit::resolve_levels(processing_classes, levels);
1135
1136	assign_levels_to_removed_chars(para.level, original_classes, levels);
1137	}
1138
1139	/// Produce the levels for this paragraph as needed for reordering, one level per code unit
1140	/// in the paragraph. The returned vector includes code units that are not included
1141	/// in the `line`, but will not adjust them.
1142	///
1143	/// This runs [Rule L1]
1144	///
1145	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
1146	fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
1147	line_classes: &[BidiClass],
1148	line_levels: &mut [Level],
1149	line_text: &'a T,
1150	para_level: Level,
1151	) {
1152	// Reset some whitespace chars to paragraph level.
1153	// <http://www.unicode.org/reports/tr9/#L1>
1154	let mut reset_from: Option<usize> = Some(`0`);
1155	let mut reset_to: Option<usize> = None;
1156	let mut prev_level = para_level;
1157	for ((i, c), (_, length)) in line_text.char_indices().zip(line_text.indices_lengths()) {
1158	match line_classes[i] {
1159	// Segment separator, Paragraph separator
1160	B \| S => {
1161	assert_eq!(reset_to, None);
1162	reset_to = Some(i + T::char_len(c));
1163	if reset_from.is_none() {
1164	reset_from = Some(i);
1165	}
1166	}
1167	// Whitespace, isolate formatting
1168	WS \| FSI \| LRI \| RLI \| PDI => {
1169	if reset_from.is_none() {
1170	reset_from = Some(i);
1171	}
1172	}
1173	// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
1174	// same as above + set the level
1175	RLE \| LRE \| RLO \| LRO \| PDF \| BN => {
1176	if reset_from.is_none() {
1177	reset_from = Some(i);
1178	}
1179	// also set the level to previous
1180	for level in &mut line_levels[i..i + length] {
1181	*level = prev_level;
1182	}
1183	}
1184	_ => {
1185	reset_from = None;
1186	}
1187	}
1188	if let (Some(from), Some(to)) = (reset_from, reset_to) {
1189	for level in &mut line_levels[from..to] {
1190	*level = para_level;
1191	}
1192	reset_from = None;
1193	reset_to = None;
1194	}
1195	prev_level = line_levels[i];
1196	}
1197	if let Some(from) = reset_from {
1198	for level in &mut line_levels[from..] {
1199	*level = para_level;
1200	}
1201	}
1202	}
1203
1204	/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
1205	/// And it supports all operation in the `Paragraph` that needs also its
1206	/// `BidiInfo` such as `direction`.
1207	#[derive(Debug)]
1208	pub struct Paragraph<'a, 'text> {
1209	pub info: &'a BidiInfo<'text>,
1210	pub para: &'a ParagraphInfo,
1211	}
1212
1213	impl<'a, 'text> Paragraph<'a, 'text> {
1214	#[inline]
1215	pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
1216	Paragraph { info, para }
1217	}
1218
1219	/// Returns if the paragraph is Left direction, right direction or mixed.
1220	#[inline]
1221	pub fn direction(&self) -> Direction {
1222	para_direction(&self.info.levels[self.para.range.clone()])
1223	}
1224
1225	/// Returns the `Level` of a certain character in the paragraph.
1226	#[inline]
1227	pub fn level_at(&self, pos: usize) -> Level {
1228	let actual_position: usize = self.para.range.start + pos;
1229	self.info.levels[actual_position]
1230	}
1231	}
1232
1233	/// Return the directionality of the paragraph (Left, Right or Mixed) from its levels.
1234	#[cfg_attr(feature = "flame_it", flamer::flame)]
1235	fn para_direction(levels: &[Level]) -> Direction {
1236	let mut ltr = `false`;
1237	let mut rtl = `false`;
1238	for level in levels {
1239	if level.is_ltr() {
1240	ltr = `true`;
1241	if rtl {
1242	return Direction::Mixed;
1243	}
1244	}
1245
1246	if level.is_rtl() {
1247	rtl = `true`;
1248	if ltr {
1249	return Direction::Mixed;
1250	}
1251	}
1252	}
1253
1254	if ltr {
1255	return Direction::Ltr;
1256	}
1257
1258	Direction::Rtl
1259	}
1260
1261	/// Assign levels to characters removed by rule X9.
1262	///
1263	/// The levels assigned to these characters are not specified by the algorithm. This function
1264	/// assigns each one the level of the previous character, to avoid breaking level runs.
1265	#[cfg_attr(feature = "flame_it", flamer::flame)]
1266	fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
1267	for i: usize in `0`..levels.len() {
1268	if prepare::removed_by_x9(class:classes[i]) {
1269	levels[i] = if i > `0` { levels[i - `1`] } else { para_level };
1270	}
1271	}
1272	}
1273
1274	/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm.
1275	///
1276	/// See rules P2 and P3.
1277	///
1278	/// The base direction is derived from the first character in the string with bidi character type
1279	/// L, R, or AL. If the first such character has type L, Direction::Ltr is returned. If the first
1280	/// such character has type R or AL, Direction::Rtl is returned.
1281	///
1282	/// If the string does not contain any character of these types (outside of embedded isolate runs),
1283	/// then Direction::Mixed is returned (but should be considered as meaning "neutral" or "unknown",
1284	/// not in fact mixed directions).
1285	///
1286	/// This is a lightweight function for use when only the base direction is needed and no further
1287	/// bidi processing of the text is needed.
1288	///
1289	/// If the text contains paragraph separators, this function considers only the first paragraph.
1290	#[cfg(feature = "hardcoded-data")]
1291	#[inline]
1292	pub fn get_base_direction<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1293	get_base_direction_with_data_source(&HardcodedBidiData, text)
1294	}
1295
1296	/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm,
1297	/// considering the full text if the first paragraph is all-neutral.
1298	///
1299	/// This is the same as get_base_direction except that it does not stop at the first block
1300	/// separator, but just resets the embedding level and continues to look for a strongly-
1301	/// directional character. So the result will be the base direction of the first paragraph
1302	/// that is not purely neutral characters.
1303	#[cfg(feature = "hardcoded-data")]
1304	#[inline]
1305	pub fn get_base_direction_full<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1306	get_base_direction_full_with_data_source(&HardcodedBidiData, text)
1307	}
1308
1309	#[inline]
1310	pub fn get_base_direction_with_data_source<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1311	data_source: &D,
1312	text: &'a T,
1313	) -> Direction {
1314	get_base_direction_impl(data_source, text, use_full_text:`false`)
1315	}
1316
1317	#[inline]
1318	pub fn get_base_direction_full_with_data_source<
1319	'a,
1320	D: BidiDataSource,
1321	T: TextSource<'a> + ?Sized,
1322	>(
1323	data_source: &D,
1324	text: &'a T,
1325	) -> Direction {
1326	get_base_direction_impl(data_source, text, use_full_text:`true`)
1327	}
1328
1329	fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1330	data_source: &D,
1331	text: &'a T,
1332	use_full_text: bool,
1333	) -> Direction {
1334	let mut isolate_level: i32 = `0`;
1335	for c: char in text.chars() {
1336	match data_source.bidi_class(c) {
1337	LRI \| RLI \| FSI => isolate_level += `1`,
1338	PDI if isolate_level > `0` => isolate_level -= `1`,
1339	L if isolate_level == `0` => return Direction::Ltr,
1340	R \| AL if isolate_level == `0` => return Direction::Rtl,
1341	B if !use_full_text => break,
1342	B if use_full_text => isolate_level = `0`,
1343	_ => (),
1344	}
1345	}
1346	// If no strong char was found, return Mixed. Normally this will be treated as Ltr by callers
1347	// (see rule P3), but we don't map this to Ltr here so that a caller that wants to apply other
1348	// heuristics to an all-neutral paragraph can tell the difference.
1349	Direction::Mixed
1350	}
1351
1352	/// Implementation of TextSource for UTF-8 text (a string slice).
1353	impl<'text> TextSource<'text> for str {
1354	type CharIter = core::str::Chars<'text>;
1355	type CharIndexIter = core::str::CharIndices<'text>;
1356	type IndexLenIter = Utf8IndexLenIter<'text>;
1357
1358	#[inline]
1359	fn len(&self) -> usize {
1360	(self as &str).len()
1361	}
1362	#[inline]
1363	fn char_at(&self, index: usize) -> Option<(char, usize)> {
1364	if let Some(slice) = self.get(index..) {
1365	if let Some(ch) = slice.chars().next() {
1366	return Some((ch, ch.len_utf8()));
1367	}
1368	}
1369	None
1370	}
1371	#[inline]
1372	fn subrange(&self, range: Range<usize>) -> &Self {
1373	&(self as &str)[range]
1374	}
1375	#[inline]
1376	fn chars(&'text self) -> Self::CharIter {
1377	(self as &str).chars()
1378	}
1379	#[inline]
1380	fn char_indices(&'text self) -> Self::CharIndexIter {
1381	(self as &str).char_indices()
1382	}
1383	#[inline]
1384	fn indices_lengths(&'text self) -> Self::IndexLenIter {
1385	Utf8IndexLenIter::new(self)
1386	}
1387	#[inline]
1388	fn char_len(ch: char) -> usize {
1389	ch.len_utf8()
1390	}
1391	}
1392
1393	/// Iterator over (UTF-8) string slices returning (index, char_len) tuple.
1394	#[derive(Debug)]
1395	pub struct Utf8IndexLenIter<'text> {
1396	iter: CharIndices<'text>,
1397	}
1398
1399	impl<'text> Utf8IndexLenIter<'text> {
1400	#[inline]
1401	pub fn new(text: &'text str) -> Self {
1402	Utf8IndexLenIter {
1403	iter: text.char_indices(),
1404	}
1405	}
1406	}
1407
1408	impl Iterator for Utf8IndexLenIter<'_> {
1409	type Item = (usize, usize);
1410
1411	#[inline]
1412	fn next(&mut self) -> Option<Self::Item> {
1413	if let Some((pos: usize, ch: char)) = self.iter.next() {
1414	return Some((pos, ch.len_utf8()));
1415	}
1416	None
1417	}
1418	}
1419
1420	#[cfg(test)]
1421	fn to_utf16(s: &str) -> Vec<u16> {
1422	s.encode_utf16().collect()
1423	}
1424
1425	#[cfg(test)]
1426	#[cfg(feature = "hardcoded-data")]
1427	mod tests {
1428	use super::*;
1429
1430	use utf16::{
1431	BidiInfo as BidiInfoU16, InitialInfo as InitialInfoU16, Paragraph as ParagraphU16,
1432	ParagraphBidiInfo as ParagraphBidiInfoU16,
1433	};
1434
1435	#[test]
1436	fn test_utf16_text_source() {
1437	let text: &[u16] =
1438	&[`0x41`, `0xD801`, `0xDC01`, `0x20`, `0xD800`, `0x20`, `0xDFFF`, `0x20`, `0xDC00`, `0xD800`];
1439	assert_eq!(text.char_at(`0`), Some(('A', `1`)));
1440	assert_eq!(text.char_at(`1`), Some(('`\u{10401}`', `2`)));
1441	assert_eq!(text.char_at(`2`), None);
1442	assert_eq!(text.char_at(`3`), Some((' ', `1`)));
1443	assert_eq!(text.char_at(`4`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1444	assert_eq!(text.char_at(`5`), Some((' ', `1`)));
1445	assert_eq!(text.char_at(`6`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1446	assert_eq!(text.char_at(`7`), Some((' ', `1`)));
1447	assert_eq!(text.char_at(`8`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1448	assert_eq!(text.char_at(`9`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1449	assert_eq!(text.char_at(`10`), None);
1450	}
1451
1452	#[test]
1453	fn test_utf16_char_iter() {
1454	let text: &[u16] =
1455	&[`0x41`, `0xD801`, `0xDC01`, `0x20`, `0xD800`, `0x20`, `0xDFFF`, `0x20`, `0xDC00`, `0xD800`];
1456	assert_eq!(text.len(), `10`);
1457	assert_eq!(text.chars().count(), `9`);
1458	let mut chars = text.chars();
1459	assert_eq!(chars.next(), Some('A'));
1460	assert_eq!(chars.next(), Some('`\u{10401}`'));
1461	assert_eq!(chars.next(), Some(' '));
1462	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1463	assert_eq!(chars.next(), Some(' '));
1464	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1465	assert_eq!(chars.next(), Some(' '));
1466	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1467	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1468	assert_eq!(chars.next(), None);
1469	}
1470
1471	#[test]
1472	fn test_initial_text_info() {
1473	let tests = vec![
1474	(
1475	// text
1476	"a1",
1477	// expected bidi classes per utf-8 byte
1478	vec![L, EN],
1479	// expected paragraph-info for utf-8
1480	vec![ParagraphInfo {
1481	range: `0`..`2`,
1482	level: LTR_LEVEL,
1483	}],
1484	// expected bidi classes per utf-16 code unit
1485	vec![L, EN],
1486	// expected paragraph-info for utf-16
1487	vec![ParagraphInfo {
1488	range: `0`..`2`,
1489	level: LTR_LEVEL,
1490	}],
1491	),
1492	(
1493	// Arabic, space, Hebrew
1494	"`\u{0639}` `\u{05D0}`",
1495	vec![AL, AL, WS, R, R],
1496	vec![ParagraphInfo {
1497	range: `0`..`5`,
1498	level: RTL_LEVEL,
1499	}],
1500	vec![AL, WS, R],
1501	vec![ParagraphInfo {
1502	range: `0`..`3`,
1503	level: RTL_LEVEL,
1504	}],
1505	),
1506	(
1507	// SMP characters from Kharoshthi, Cuneiform, Adlam:
1508	"`\u{10A00}\u{12000}\u{1E900}`",
1509	vec![R, R, R, R, L, L, L, L, R, R, R, R],
1510	vec![ParagraphInfo {
1511	range: `0`..`12`,
1512	level: RTL_LEVEL,
1513	}],
1514	vec![R, R, L, L, R, R],
1515	vec![ParagraphInfo {
1516	range: `0`..`6`,
1517	level: RTL_LEVEL,
1518	}],
1519	),
1520	(
1521	"a`\u{2029}`b",
1522	vec![L, B, B, B, L],
1523	vec![
1524	ParagraphInfo {
1525	range: `0`..`4`,
1526	level: LTR_LEVEL,
1527	},
1528	ParagraphInfo {
1529	range: `4`..`5`,
1530	level: LTR_LEVEL,
1531	},
1532	],
1533	vec![L, B, L],
1534	vec![
1535	ParagraphInfo {
1536	range: `0`..`2`,
1537	level: LTR_LEVEL,
1538	},
1539	ParagraphInfo {
1540	range: `2`..`3`,
1541	level: LTR_LEVEL,
1542	},
1543	],
1544	),
1545	(
1546	"`\u{2068}`א`\u{2069}`a", // U+2068 FSI, U+2069 PDI
1547	vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
1548	vec![ParagraphInfo {
1549	range: `0`..`9`,
1550	level: LTR_LEVEL,
1551	}],
1552	vec![RLI, R, PDI, L],
1553	vec![ParagraphInfo {
1554	range: `0`..`4`,
1555	level: LTR_LEVEL,
1556	}],
1557	),
1558	];
1559
1560	for t in tests {
1561	assert_eq!(
1562	InitialInfo::new(t.`0`, None),
1563	InitialInfo {
1564	text: t.`0`,
1565	original_classes: t.`1`,
1566	paragraphs: t.`2`,
1567	}
1568	);
1569	let text = &to_utf16(t.0);
1570	assert_eq!(
1571	InitialInfoU16::new(text, None),
1572	InitialInfoU16 {
1573	text,
1574	original_classes: t.`3`,
1575	paragraphs: t.`4`,
1576	}
1577	);
1578	}
1579	}
1580
1581	#[test]
1582	#[cfg(feature = "hardcoded-data")]
1583	fn test_process_text() {
1584	let tests = vec![
1585	(
1586	// text
1587	"",
1588	// base level
1589	Some(RTL_LEVEL),
1590	// levels
1591	Level::vec(&[]),
1592	// original_classes
1593	vec![],
1594	// paragraphs
1595	vec![],
1596	// levels_u16
1597	Level::vec(&[]),
1598	// original_classes_u16
1599	vec![],
1600	// paragraphs_u16
1601	vec![],
1602	),
1603	(
1604	// text
1605	"abc123",
1606	// base level
1607	Some(LTR_LEVEL),
1608	// levels
1609	Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`]),
1610	// original_classes
1611	vec![L, L, L, EN, EN, EN],
1612	// paragraphs
1613	vec![ParagraphInfo {
1614	range: `0`..`6`,
1615	level: LTR_LEVEL,
1616	}],
1617	// levels_u16
1618	Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`]),
1619	// original_classes_u16
1620	vec![L, L, L, EN, EN, EN],
1621	// paragraphs_u16
1622	vec![ParagraphInfo {
1623	range: `0`..`6`,
1624	level: LTR_LEVEL,
1625	}],
1626	),
1627	(
1628	"abc `\u{05D0}\u{05D1}\u{05D2}`",
1629	Some(LTR_LEVEL),
1630	Level::vec(&[`0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1631	vec![L, L, L, WS, R, R, R, R, R, R],
1632	vec![ParagraphInfo {
1633	range: `0`..`10`,
1634	level: LTR_LEVEL,
1635	}],
1636	Level::vec(&[`0`, `0`, `0`, `0`, `1`, `1`, `1`]),
1637	vec![L, L, L, WS, R, R, R],
1638	vec![ParagraphInfo {
1639	range: `0`..`7`,
1640	level: LTR_LEVEL,
1641	}],
1642	),
1643	(
1644	"abc `\u{05D0}\u{05D1}\u{05D2}`",
1645	Some(RTL_LEVEL),
1646	Level::vec(&[`2`, `2`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`]),
1647	vec![L, L, L, WS, R, R, R, R, R, R],
1648	vec![ParagraphInfo {
1649	range: `0`..`10`,
1650	level: RTL_LEVEL,
1651	}],
1652	Level::vec(&[`2`, `2`, `2`, `1`, `1`, `1`, `1`]),
1653	vec![L, L, L, WS, R, R, R],
1654	vec![ParagraphInfo {
1655	range: `0`..`7`,
1656	level: RTL_LEVEL,
1657	}],
1658	),
1659	(
1660	"`\u{05D0}\u{05D1}\u{05D2}` abc",
1661	Some(LTR_LEVEL),
1662	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`]),
1663	vec![R, R, R, R, R, R, WS, L, L, L],
1664	vec![ParagraphInfo {
1665	range: `0`..`10`,
1666	level: LTR_LEVEL,
1667	}],
1668	Level::vec(&[`1`, `1`, `1`, `0`, `0`, `0`, `0`]),
1669	vec![R, R, R, WS, L, L, L],
1670	vec![ParagraphInfo {
1671	range: `0`..`7`,
1672	level: LTR_LEVEL,
1673	}],
1674	),
1675	(
1676	"`\u{05D0}\u{05D1}\u{05D2}` abc",
1677	None,
1678	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`]),
1679	vec![R, R, R, R, R, R, WS, L, L, L],
1680	vec![ParagraphInfo {
1681	range: `0`..`10`,
1682	level: RTL_LEVEL,
1683	}],
1684	Level::vec(&[`1`, `1`, `1`, `1`, `2`, `2`, `2`]),
1685	vec![R, R, R, WS, L, L, L],
1686	vec![ParagraphInfo {
1687	range: `0`..`7`,
1688	level: RTL_LEVEL,
1689	}],
1690	),
1691	(
1692	"`\u{063A}`2`\u{0638}` `\u{05D0}`2`\u{05D2}`",
1693	Some(LTR_LEVEL),
1694	Level::vec(&[`1`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `1`]),
1695	vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
1696	vec![ParagraphInfo {
1697	range: `0`..`11`,
1698	level: LTR_LEVEL,
1699	}],
1700	Level::vec(&[`1`, `2`, `1`, `1`, `1`, `2`, `1`]),
1701	vec![AL, EN, AL, WS, R, EN, R],
1702	vec![ParagraphInfo {
1703	range: `0`..`7`,
1704	level: LTR_LEVEL,
1705	}],
1706	),
1707	(
1708	"a א.`\n`ג",
1709	None,
1710	Level::vec(&[`0`, `0`, `1`, `1`, `0`, `0`, `1`, `1`]),
1711	vec![L, WS, R, R, CS, B, R, R],
1712	vec![
1713	ParagraphInfo {
1714	range: `0`..`6`,
1715	level: LTR_LEVEL,
1716	},
1717	ParagraphInfo {
1718	range: `6`..`8`,
1719	level: RTL_LEVEL,
1720	},
1721	],
1722	Level::vec(&[`0`, `0`, `1`, `0`, `0`, `1`]),
1723	vec![L, WS, R, CS, B, R],
1724	vec![
1725	ParagraphInfo {
1726	range: `0`..`5`,
1727	level: LTR_LEVEL,
1728	},
1729	ParagraphInfo {
1730	range: `5`..`6`,
1731	level: RTL_LEVEL,
1732	},
1733	],
1734	),
1735	// BidiTest:69635 (AL ET EN)
1736	(
1737	"`\u{060B}\u{20CF}\u{06F9}`",
1738	None,
1739	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `2`, `2`]),
1740	vec![AL, AL, ET, ET, ET, EN, EN],
1741	vec![ParagraphInfo {
1742	range: `0`..`7`,
1743	level: RTL_LEVEL,
1744	}],
1745	Level::vec(&[`1`, `1`, `2`]),
1746	vec![AL, ET, EN],
1747	vec![ParagraphInfo {
1748	range: `0`..`3`,
1749	level: RTL_LEVEL,
1750	}],
1751	),
1752	];
1753
1754	for t in tests {
1755	assert_eq!(
1756	BidiInfo::new(t.`0`, t.`1`),
1757	BidiInfo {
1758	text: t.`0`,
1759	levels: t.`2`.clone(),
1760	original_classes: t.`3`.clone(),
1761	paragraphs: t.`4`.clone(),
1762	}
1763	);
1764	// If it was empty, also test that ParagraphBidiInfo handles it safely.
1765	if t.4.len() == `0` {
1766	assert_eq!(
1767	ParagraphBidiInfo::new(t.`0`, t.`1`),
1768	ParagraphBidiInfo {
1769	text: t.`0`,
1770	original_classes: t.`3`.clone(),
1771	levels: t.`2`.clone(),
1772	paragraph_level: RTL_LEVEL,
1773	is_pure_ltr: `true`,
1774	}
1775	)
1776	}
1777	// If it was a single paragraph, also test ParagraphBidiInfo.
1778	if t.4.len() == `1` {
1779	assert_eq!(
1780	ParagraphBidiInfo::new(t.`0`, t.`1`),
1781	ParagraphBidiInfo {
1782	text: t.`0`,
1783	original_classes: t.`3`,
1784	levels: t.`2`.clone(),
1785	paragraph_level: t.`4`[`0`].level,
1786	is_pure_ltr: !level::has_rtl(&t.`2`),
1787	}
1788	)
1789	}
1790	let text = &to_utf16(t.0);
1791	assert_eq!(
1792	BidiInfoU16::new(text, t.`1`),
1793	BidiInfoU16 {
1794	text,
1795	levels: t.`5`.clone(),
1796	original_classes: t.`6`.clone(),
1797	paragraphs: t.`7`.clone(),
1798	}
1799	);
1800	if t.7.len() == `1` {
1801	assert_eq!(
1802	ParagraphBidiInfoU16::new(text, t.`1`),
1803	ParagraphBidiInfoU16 {
1804	text: text,
1805	original_classes: t.`6`.clone(),
1806	levels: t.`5`.clone(),
1807	paragraph_level: t.`7`[`0`].level,
1808	is_pure_ltr: !level::has_rtl(&t.`5`),
1809	}
1810	)
1811	}
1812	}
1813	}
1814
1815	#[test]
1816	#[cfg(feature = "hardcoded-data")]
1817	fn test_paragraph_bidi_info() {
1818	// Passing text that includes a paragraph break to the ParagraphBidiInfo API:
1819	// this is a misuse of the API by the client, but our behavior is safe &
1820	// consistent. The embedded paragraph break acts like a separator (tab) would.
1821	let tests = vec![
1822	(
1823	"a א.`\n`ג",
1824	None,
1825	// utf-8 results:
1826	vec![L, WS, R, R, CS, B, R, R],
1827	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1828	// utf-16 results:
1829	vec![L, WS, R, CS, B, R],
1830	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`]),
1831	// paragraph level; is_pure_ltr
1832	LTR_LEVEL,
1833	`false`,
1834	),
1835	(
1836	"`\u{5d1}` a.`\n`b.",
1837	None,
1838	// utf-8 results:
1839	vec![R, R, WS, L, CS, B, L, CS],
1840	Level::vec(&[`1`, `1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1841	// utf-16 results:
1842	vec![R, WS, L, CS, B, L, CS],
1843	Level::vec(&[`1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1844	// paragraph level; is_pure_ltr
1845	RTL_LEVEL,
1846	`false`,
1847	),
1848	(
1849	"a א.`\t`ג",
1850	None,
1851	// utf-8 results:
1852	vec![L, WS, R, R, CS, S, R, R],
1853	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1854	// utf-16 results:
1855	vec![L, WS, R, CS, S, R],
1856	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`]),
1857	// paragraph level; is_pure_ltr
1858	LTR_LEVEL,
1859	`false`,
1860	),
1861	(
1862	"`\u{5d1}` a.`\t`b.",
1863	None,
1864	// utf-8 results:
1865	vec![R, R, WS, L, CS, S, L, CS],
1866	Level::vec(&[`1`, `1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1867	// utf-16 results:
1868	vec![R, WS, L, CS, S, L, CS],
1869	Level::vec(&[`1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1870	// paragraph level; is_pure_ltr
1871	RTL_LEVEL,
1872	`false`,
1873	),
1874	];
1875
1876	for t in tests {
1877	assert_eq!(
1878	ParagraphBidiInfo::new(t.`0`, t.`1`),
1879	ParagraphBidiInfo {
1880	text: t.`0`,
1881	original_classes: t.`2`,
1882	levels: t.`3`,
1883	paragraph_level: t.`6`,
1884	is_pure_ltr: t.`7`,
1885	}
1886	);
1887	let text = &to_utf16(t.0);
1888	assert_eq!(
1889	ParagraphBidiInfoU16::new(text, t.`1`),
1890	ParagraphBidiInfoU16 {
1891	text: text,
1892	original_classes: t.`4`,
1893	levels: t.`5`,
1894	paragraph_level: t.`6`,
1895	is_pure_ltr: t.`7`,
1896	}
1897	);
1898	}
1899	}
1900
1901	#[test]
1902	#[cfg(feature = "hardcoded-data")]
1903	fn test_bidi_info_has_rtl() {
1904	let tests = vec![
1905	// ASCII only
1906	("123", None, `false`),
1907	("123", Some(LTR_LEVEL), `false`),
1908	("123", Some(RTL_LEVEL), `false`),
1909	("abc", None, `false`),
1910	("abc", Some(LTR_LEVEL), `false`),
1911	("abc", Some(RTL_LEVEL), `false`),
1912	("abc 123", None, `false`),
1913	("abc`\n`123", None, `false`),
1914	// With Hebrew
1915	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1916	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", Some(LTR_LEVEL), `true`),
1917	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", Some(RTL_LEVEL), `true`),
1918	("abc `\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1919	("abc`\n\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1920	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}` abc", None, `true`),
1921	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n`abc", None, `true`),
1922	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}` 123", None, `true`),
1923	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n`123", None, `true`),
1924	];
1925
1926	for t in tests {
1927	assert_eq!(BidiInfo::new(t.`0`, t.`1`).has_rtl(), t.`2`);
1928	assert_eq!(BidiInfoU16::new(&to_utf16(t.`0`), t.`1`).has_rtl(), t.`2`);
1929	}
1930	}
1931
1932	#[cfg(feature = "hardcoded-data")]
1933	fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> {
1934	let bidi_info = BidiInfo::new(text, None);
1935	bidi_info
1936	.paragraphs
1937	.iter()
1938	.map(\|para\| bidi_info.reorder_line(para, para.range.clone()))
1939	.collect()
1940	}
1941
1942	#[cfg(feature = "hardcoded-data")]
1943	fn reorder_paras_u16(text: &[u16]) -> Vec<Cow<'_, [u16]>> {
1944	let bidi_info = BidiInfoU16::new(text, None);
1945	bidi_info
1946	.paragraphs
1947	.iter()
1948	.map(\|para\| bidi_info.reorder_line(para, para.range.clone()))
1949	.collect()
1950	}
1951
1952	#[test]
1953	#[cfg(feature = "hardcoded-data")]
1954	fn test_reorder_line() {
1955	let tests = vec![
1956	// Bidi_Class: L L L B L L L B L L L
1957	("abc`\n`def`\n`ghi", vec!["abc`\n`", "def`\n`", "ghi"]),
1958	// Bidi_Class: L L EN B L L EN B L L EN
1959	("ab1`\n`de2`\n`gh3", vec!["ab1`\n`", "de2`\n`", "gh3"]),
1960	// Bidi_Class: L L L B AL AL AL
1961	("abc`\n`ابج", vec!["abc`\n`", "جبا"]),
1962	// Bidi_Class: AL AL AL B L L L
1963	(
1964	"`\u{0627}\u{0628}\u{062C}\n`abc",
1965	vec!["`\n\u{062C}\u{0628}\u{0627}`", "abc"],
1966	),
1967	("1.-2", vec!["1.-2"]),
1968	("1-.2", vec!["1-.2"]),
1969	("abc אבג", vec!["abc גבא"]),
1970	// Numbers being weak LTR characters, cannot reorder strong RTL
1971	("123 `\u{05D0}\u{05D1}\u{05D2}`", vec!["גבא 123"]),
1972	("abc`\u{202A}`def", vec!["abc`\u{202A}`def"]),
1973	(
1974	"abc`\u{202A}`def`\u{202C}`ghi",
1975	vec!["abc`\u{202A}`def`\u{202C}`ghi"],
1976	),
1977	(
1978	"abc`\u{2066}`def`\u{2069}`ghi",
1979	vec!["abc`\u{2066}`def`\u{2069}`ghi"],
1980	),
1981	// Testing for RLE Character
1982	("`\u{202B}`abc אבג`\u{202C}`", vec!["`\u{202b}`גבא abc`\u{202c}`"]),
1983	// Testing neutral characters
1984	("`\u{05D0}`בג? אבג", vec!["גבא ?גבא"]),
1985	// Testing neutral characters with special case
1986	("A אבג?", vec!["A גבא?"]),
1987	// Testing neutral characters with Implicit RTL Marker
1988	("A אבג?`\u{200F}`", vec!["A `\u{200F}`?גבא"]),
1989	("`\u{05D0}`בג abc", vec!["abc גבא"]),
1990	("abc`\u{2067}`.-`\u{2069}`ghi", vec!["abc`\u{2067}`-.`\u{2069}`ghi"]),
1991	(
1992	"Hello, `\u{2068}\u{202E}`world`\u{202C}\u{2069}`!",
1993	vec!["Hello, `\u{2068}\u{202E}\u{202C}`dlrow`\u{2069}`!"],
1994	),
1995	// With mirrorable characters in RTL run
1996	("`\u{05D0}`(ב)ג.", vec![".ג)ב(א"]),
1997	// With mirrorable characters on level boundary
1998	("`\u{05D0}`ב(גד[&ef].)gh", vec!["gh).]ef&[דג(בא"]),
1999	];
2000
2001	for t in tests {
2002	assert_eq!(reorder_paras(t.`0`), t.`1`);
2003	let expect_utf16 = t.1.iter().map(\|v\| to_utf16(v)).collect::<Vec<_>>();
2004	assert_eq!(reorder_paras_u16(&to_utf16(t.`0`)), expect_utf16);
2005	}
2006	}
2007
2008	fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
2009	let bidi_info = BidiInfo::new(text, None);
2010	bidi_info
2011	.paragraphs
2012	.iter()
2013	.map(\|para\| bidi_info.reordered_levels(para, para.range.clone()))
2014	.collect()
2015	}
2016
2017	fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
2018	let bidi_info = BidiInfo::new(text, None);
2019	bidi_info
2020	.paragraphs
2021	.iter()
2022	.map(\|para\| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2023	.collect()
2024	}
2025
2026	fn reordered_levels_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2027	let bidi_info = BidiInfoU16::new(text, None);
2028	bidi_info
2029	.paragraphs
2030	.iter()
2031	.map(\|para\| bidi_info.reordered_levels(para, para.range.clone()))
2032	.collect()
2033	}
2034
2035	fn reordered_levels_per_char_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2036	let bidi_info = BidiInfoU16::new(text, None);
2037	bidi_info
2038	.paragraphs
2039	.iter()
2040	.map(\|para\| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2041	.collect()
2042	}
2043
2044	#[test]
2045	#[cfg(feature = "hardcoded-data")]
2046	// See issue #138
2047	fn test_reordered_levels_range() {
2048	// \|---------------\|
2049	let s = "`\u{202a}`A`\u{202c}\u{202a}`A`\u{202c}`";
2050	let range = `4`..`11`;
2051	assert!(s.get(range.clone()).is_some());
2052
2053	let bidi = BidiInfo::new(s, None);
2054	let (_, runs) = bidi.visual_runs(&bidi.paragraphs[`0`], range);
2055
2056	for run in runs {
2057	let _ = &s[run]; // should be valid slice of s
2058	}
2059	}
2060
2061	#[test]
2062	#[cfg(feature = "hardcoded-data")]
2063	fn test_reordered_levels() {
2064	let tests = vec![
2065	// BidiTest:946 (LRI PDI)
2066	(
2067	"`\u{2067}\u{2069}`",
2068	vec![Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`])],
2069	vec![Level::vec(&[`0`, `0`])],
2070	vec![Level::vec(&[`0`, `0`])],
2071	),
2072	// BidiTest:69635 (AL ET EN)
2073	(
2074	"`\u{060B}\u{20CF}\u{06F9}`",
2075	vec![Level::vec(&[`1`, `1`, `1`, `1`, `1`, `2`, `2`])],
2076	vec![Level::vec(&[`1`, `1`, `2`])],
2077	vec![Level::vec(&[`1`, `1`, `2`])],
2078	),
2079	];
2080
2081	for t in tests {
2082	assert_eq!(reordered_levels_for_paras(t.`0`), t.`1`);
2083	assert_eq!(reordered_levels_per_char_for_paras(t.`0`), t.`2`);
2084	let text = &to_utf16(t.0);
2085	assert_eq!(reordered_levels_for_paras_u16(text), t.`3`);
2086	assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.`2`);
2087	}
2088
2089	let tests = vec![
2090	// BidiTest:291284 (AN RLI PDF R)
2091	(
2092	"`\u{0605}\u{2067}\u{202C}\u{0590}`",
2093	vec![&["2", "2", "0", "0", "0", "x", "x", "x", "1", "1"]],
2094	vec![&["2", "0", "x", "1"]],
2095	vec![&["2", "0", "x", "1"]],
2096	),
2097	];
2098
2099	for t in tests {
2100	assert_eq!(reordered_levels_for_paras(t.`0`), t.`1`);
2101	assert_eq!(reordered_levels_per_char_for_paras(t.`0`), t.`2`);
2102	let text = &to_utf16(t.0);
2103	assert_eq!(reordered_levels_for_paras_u16(text), t.`3`);
2104	assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.`2`);
2105	}
2106
2107	let text = "aa טֶ";
2108	let bidi_info = BidiInfo::new(text, None);
2109	assert_eq!(
2110	bidi_info.reordered_levels(&bidi_info.paragraphs[`0`], `3`..`7`),
2111	Level::vec(&[`0`, `0`, `0`, `1`, `1`, `1`, `1`]),
2112	);
2113
2114	let text = &to_utf16(text);
2115	let bidi_info = BidiInfoU16::new(text, None);
2116	assert_eq!(
2117	bidi_info.reordered_levels(&bidi_info.paragraphs[`0`], `1`..`4`),
2118	Level::vec(&[`0`, `0`, `0`, `1`, `1`]),
2119	);
2120	}
2121
2122	#[test]
2123	fn test_paragraph_info_len() {
2124	let text = "hello world";
2125	let bidi_info = BidiInfo::new(text, None);
2126	assert_eq!(bidi_info.paragraphs.len(), `1`);
2127	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len());
2128
2129	let text2 = "How are you";
2130	let whole_text = format!("{}`\n`{}", text, text2);
2131	let bidi_info = BidiInfo::new(&whole_text, None);
2132	assert_eq!(bidi_info.paragraphs.len(), `2`);
2133
2134	// The first paragraph include the paragraph separator.
2135	// TODO: investigate if the paragraph separator character
2136	// should not be part of any paragraph.
2137	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len() + `1`);
2138	assert_eq!(bidi_info.paragraphs[`1`].len(), text2.len());
2139
2140	let text = &to_utf16(text);
2141	let bidi_info = BidiInfoU16::new(text, None);
2142	assert_eq!(bidi_info.paragraphs.len(), `1`);
2143	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len());
2144
2145	let text2 = &to_utf16(text2);
2146	let whole_text = &to_utf16(&whole_text);
2147	let bidi_info = BidiInfoU16::new(&whole_text, None);
2148	assert_eq!(bidi_info.paragraphs.len(), `2`);
2149
2150	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len() + `1`);
2151	assert_eq!(bidi_info.paragraphs[`1`].len(), text2.len());
2152	}
2153
2154	#[test]
2155	fn test_direction() {
2156	let ltr_text = "hello world";
2157	let rtl_text = "أهلا بكم";
2158	let all_paragraphs = format!("{}`\n`{}`\n`{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2159	let bidi_info = BidiInfo::new(&all_paragraphs, None);
2160	assert_eq!(bidi_info.paragraphs.len(), `3`);
2161	let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2162	let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2163	let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2164	assert_eq!(p_ltr.direction(), Direction::Ltr);
2165	assert_eq!(p_rtl.direction(), Direction::Rtl);
2166	assert_eq!(p_mixed.direction(), Direction::Mixed);
2167
2168	let all_paragraphs = &to_utf16(&all_paragraphs);
2169	let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2170	assert_eq!(bidi_info.paragraphs.len(), `3`);
2171	let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2172	let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2173	let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2174	assert_eq!(p_ltr.direction(), Direction::Ltr);
2175	assert_eq!(p_rtl.direction(), Direction::Rtl);
2176	assert_eq!(p_mixed.direction(), Direction::Mixed);
2177	}
2178
2179	#[test]
2180	fn test_edge_cases_direction() {
2181	// No paragraphs for empty text.
2182	let empty = "";
2183	let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL));
2184	assert_eq!(bidi_info.paragraphs.len(), `0`);
2185
2186	let empty = &to_utf16(empty);
2187	let bidi_info = BidiInfoU16::new(empty, Option::from(RTL_LEVEL));
2188	assert_eq!(bidi_info.paragraphs.len(), `0`);
2189
2190	let tests = vec![
2191	// The paragraph separator will take the value of the default direction
2192	// which is left to right.
2193	("`\n`", None, Direction::Ltr),
2194	// The paragraph separator will take the value of the given initial direction
2195	// which is left to right.
2196	("`\n`", Option::from(LTR_LEVEL), Direction::Ltr),
2197	// The paragraph separator will take the value of the given initial direction
2198	// which is right to left.
2199	("`\n`", Option::from(RTL_LEVEL), Direction::Rtl),
2200	];
2201
2202	for t in tests {
2203	let bidi_info = BidiInfo::new(t.0, t.1);
2204	assert_eq!(bidi_info.paragraphs.len(), `1`);
2205	let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2206	assert_eq!(p.direction(), t.`2`);
2207	let text = &to_utf16(t.0);
2208	let bidi_info = BidiInfoU16::new(text, t.1);
2209	let p = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2210	assert_eq!(p.direction(), t.`2`);
2211	}
2212	}
2213
2214	#[test]
2215	fn test_level_at() {
2216	let ltr_text = "hello world";
2217	let rtl_text = "أهلا بكم";
2218	let all_paragraphs = format!("{}`\n`{}`\n`{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2219	let bidi_info = BidiInfo::new(&all_paragraphs, None);
2220	assert_eq!(bidi_info.paragraphs.len(), `3`);
2221
2222	let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2223	let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2224	let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2225
2226	assert_eq!(p_ltr.level_at(`0`), LTR_LEVEL);
2227	assert_eq!(p_rtl.level_at(`0`), RTL_LEVEL);
2228	assert_eq!(p_mixed.level_at(`0`), LTR_LEVEL);
2229	assert_eq!(p_mixed.info.levels.len(), `54`);
2230	assert_eq!(p_mixed.para.range.start, `28`);
2231	assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2232
2233	let all_paragraphs = &to_utf16(&all_paragraphs);
2234	let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2235	assert_eq!(bidi_info.paragraphs.len(), `3`);
2236
2237	let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2238	let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2239	let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2240
2241	assert_eq!(p_ltr.level_at(`0`), LTR_LEVEL);
2242	assert_eq!(p_rtl.level_at(`0`), RTL_LEVEL);
2243	assert_eq!(p_mixed.level_at(`0`), LTR_LEVEL);
2244	assert_eq!(p_mixed.info.levels.len(), `40`);
2245	assert_eq!(p_mixed.para.range.start, `21`);
2246	assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2247	}
2248
2249	#[test]
2250	fn test_get_base_direction() {
2251	let tests = vec![
2252	("", Direction::Mixed), // return Mixed if no strong character found
2253	("123[]-+`\u{2019}\u{2060}\u{00bf}`?", Direction::Mixed),
2254	("3.14`\n`pi", Direction::Mixed), // only first paragraph is considered
2255	("[123 'abc']", Direction::Ltr),
2256	("[123 '`\u{0628}`' abc", Direction::Rtl),
2257	("[123 '`\u{2066}`abc`\u{2069}`'`\u{0628}`]", Direction::Rtl), // embedded isolate is ignored
2258	("[123 '`\u{2066}`abc`\u{2068}`'`\u{0628}`]", Direction::Mixed),
2259	];
2260
2261	for t in tests {
2262	assert_eq!(get_base_direction(t.`0`), t.`1`);
2263	let text = &to_utf16(t.0);
2264	assert_eq!(get_base_direction(text.as_slice()), t.`1`);
2265	}
2266	}
2267
2268	#[test]
2269	fn test_get_base_direction_full() {
2270	let tests = vec![
2271	("", Direction::Mixed), // return Mixed if no strong character found
2272	("123[]-+`\u{2019}\u{2060}\u{00bf}`?", Direction::Mixed),
2273	("3.14`\n`pi", Direction::Ltr), // direction taken from the second paragraph
2274	("3.14`\n\u{05D0}`", Direction::Rtl), // direction taken from the second paragraph
2275	("[123 'abc']", Direction::Ltr),
2276	("[123 '`\u{0628}`' abc", Direction::Rtl),
2277	("[123 '`\u{2066}`abc`\u{2069}`'`\u{0628}`]", Direction::Rtl), // embedded isolate is ignored
2278	("[123 '`\u{2066}`abc`\u{2068}`'`\u{0628}`]", Direction::Mixed),
2279	("[123 '`\u{2066}`abc`\u{2068}`'`\n\u{0628}`]", Direction::Rtl), // \n resets embedding level
2280	];
2281
2282	for t in tests {
2283	assert_eq!(get_base_direction_full(t.`0`), t.`1`);
2284	let text = &to_utf16(t.0);
2285	assert_eq!(get_base_direction_full(text.as_slice()), t.`1`);
2286	}
2287	}
2288	}
2289
2290	#[cfg(all(feature = "serde", feature = "hardcoded-data", test))]
2291	mod serde_tests {
2292	use super::*;
2293	use serde_test::{assert_tokens, Token};
2294
2295	#[test]
2296	fn test_levels() {
2297	let text = "abc אבג";
2298	let bidi_info = BidiInfo::new(text, None);
2299	let levels = bidi_info.levels;
2300	assert_eq!(text.as_bytes().len(), `10`);
2301	assert_eq!(levels.len(), `10`);
2302	assert_tokens(
2303	&levels,
2304	&[
2305	Token::Seq { len: Some(`10`) },
2306	Token::NewtypeStruct { name: "Level" },
2307	Token::U8(`0`),
2308	Token::NewtypeStruct { name: "Level" },
2309	Token::U8(`0`),
2310	Token::NewtypeStruct { name: "Level" },
2311	Token::U8(`0`),
2312	Token::NewtypeStruct { name: "Level" },
2313	Token::U8(`0`),
2314	Token::NewtypeStruct { name: "Level" },
2315	Token::U8(`1`),
2316	Token::NewtypeStruct { name: "Level" },
2317	Token::U8(`1`),
2318	Token::NewtypeStruct { name: "Level" },
2319	Token::U8(`1`),
2320	Token::NewtypeStruct { name: "Level" },
2321	Token::U8(`1`),
2322	Token::NewtypeStruct { name: "Level" },
2323	Token::U8(`1`),
2324	Token::NewtypeStruct { name: "Level" },
2325	Token::U8(`1`),
2326	Token::SeqEnd,
2327	],
2328	);
2329	}
2330	}
2331