lib.rs source code [crates/unicode-bidi-0.3.15/src/lib.rs]

1	// Copyright 2015 The Servo Project Developers. See the
2	// COPYRIGHT file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	//! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed
11	//! right-to-left and left-to-right text. It is written in safe Rust, compatible with the
12	//! current stable release.
13	//!
14	//! ## Example
15	//!
16	//! ```rust
17	//! # #[cfg(feature = "hardcoded-data")] {
18	//! use unicode_bidi::BidiInfo;
19	//!
20	//! // This example text is defined using `concat!` because some browsers
21	//! // and text editors have trouble displaying bidi strings.
22	//! let text = concat![
23	//! "א",
24	//! "ב",
25	//! "ג",
26	//! "a",
27	//! "b",
28	//! "c",
29	//! ];
30	//!
31	//! // Resolve embedding levels within the text. Pass `None` to detect the
32	//! // paragraph level automatically.
33	//! let bidi_info = BidiInfo::new(&text, None);
34	//!
35	//! // This paragraph has embedding level 1 because its first strong character is RTL.
36	//! assert_eq!(bidi_info.paragraphs.len(), `1`);
37	//! let para = &bidi_info.paragraphs[`0`];
38	//! assert_eq!(para.level.number(), `1`);
39	//! assert_eq!(para.level.is_rtl(), `true`);
40	//!
41	//! // Re-ordering is done after wrapping each paragraph into a sequence of
42	//! // lines. For this example, I'll just use a single line that spans the
43	//! // entire paragraph.
44	//! let line = para.range.clone();
45	//!
46	//! let display = bidi_info.reorder_line(para, line);
47	//! assert_eq!(display, concat![
48	//! "a",
49	//! "b",
50	//! "c",
51	//! "ג",
52	//! "ב",
53	//! "א",
54	//! ]);
55	//! # } // feature = "hardcoded-data"
56	//! ```
57	//!
58	//! # Features
59	//!
60	//! - `std`: Enabled by default, but can be disabled to make `unicode_bidi`
61	//! `#![no_std]` + `alloc` compatible.
62	//! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs.
63	//! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`]
64	//! implementations to relevant types.
65	//!
66	//! [tr9]: <http://www.unicode.org/reports/tr9/>
67
68	#![no_std]
69	// We need to link to std to make doc tests work on older Rust versions
70	#[cfg(feature = "std")]
71	extern crate std;
72	#[macro_use]
73	extern crate alloc;
74
75	pub mod data_source;
76	pub mod deprecated;
77	pub mod format_chars;
78	pub mod level;
79	pub mod utf16;
80
81	mod char_data;
82	mod explicit;
83	mod implicit;
84	mod prepare;
85
86	pub use crate::char_data::{BidiClass, UNICODE_VERSION};
87	pub use crate::data_source::BidiDataSource;
88	pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
89	pub use crate::prepare::LevelRun;
90
91	#[cfg(feature = "hardcoded-data")]
92	pub use crate::char_data::{bidi_class, HardcodedBidiData};
93
94	use alloc::borrow::Cow;
95	use alloc::string::String;
96	use alloc::vec::Vec;
97	use core::char;
98	use core::cmp;
99	use core::iter::repeat;
100	use core::ops::Range;
101	use core::str::CharIndices;
102
103	use crate::format_chars as chars;
104	use crate::BidiClass::*;
105
106	/// Trait that abstracts over a text source for use by the bidi algorithms.
107	/// We implement this for str (UTF-8) and for [u16] (UTF-16, native-endian).
108	/// (For internal unicode-bidi use; API may be unstable.)
109	/// This trait is sealed and cannot be implemented for types outside this crate.
110	pub trait TextSource<'text>: private::Sealed {
111	type CharIter: Iterator<Item = char>;
112	type CharIndexIter: Iterator<Item = (usize, char)>;
113	type IndexLenIter: Iterator<Item = (usize, usize)>;
114
115	/// Return the length of the text in code units.
116	#[doc(hidden)]
117	fn len(&self) -> usize;
118
119	/// Get the character at a given code unit index, along with its length in code units.
120	/// Returns None if index is out of range, or points inside a multi-code-unit character.
121	/// Returns REPLACEMENT_CHARACTER for any unpaired surrogates in UTF-16.
122	#[doc(hidden)]
123	fn char_at(&self, index: usize) -> Option<(char, usize)>;
124
125	/// Return a subrange of the text, indexed by code units.
126	/// (We don't implement all of the Index trait, just the minimum we use.)
127	#[doc(hidden)]
128	fn subrange(&self, range: Range<usize>) -> &Self;
129
130	/// An iterator over the text returning Unicode characters,
131	/// REPLACEMENT_CHAR for invalid code units.
132	#[doc(hidden)]
133	fn chars(&'text self) -> Self::CharIter;
134
135	/// An iterator over the text returning (index, char) tuples,
136	/// where index is the starting code-unit index of the character,
137	/// and char is its Unicode value (or REPLACEMENT_CHAR if invalid).
138	#[doc(hidden)]
139	fn char_indices(&'text self) -> Self::CharIndexIter;
140
141	/// An iterator over the text returning (index, length) tuples,
142	/// where index is the starting code-unit index of the character,
143	/// and length is its length in code units.
144	#[doc(hidden)]
145	fn indices_lengths(&'text self) -> Self::IndexLenIter;
146
147	/// Number of code units the given character uses.
148	#[doc(hidden)]
149	fn char_len(ch: char) -> usize;
150	}
151
152	mod private {
153	pub trait Sealed {}
154
155	// Implement for str and [u16] only.
156	impl Sealed for str {}
157	impl Sealed for [u16] {}
158	}
159
160	#[derive(PartialEq, Debug)]
161	pub enum Direction {
162	Ltr,
163	Rtl,
164	Mixed,
165	}
166
167	/// Bidi information about a single paragraph
168	#[derive(Clone, Debug, PartialEq)]
169	pub struct ParagraphInfo {
170	/// The paragraphs boundaries within the text, as byte indices.
171	///
172	/// TODO: Shrink this to only include the starting index?
173	pub range: Range<usize>,
174
175	/// The paragraph embedding level.
176	///
177	/// <http://www.unicode.org/reports/tr9/#BD4>
178	pub level: Level,
179	}
180
181	impl ParagraphInfo {
182	/// Gets the length of the paragraph in the source text.
183	pub fn len(&self) -> usize {
184	self.range.end - self.range.start
185	}
186	}
187
188	/// Initial bidi information of the text.
189	///
190	/// Contains the text paragraphs and `BidiClass` of its characters.
191	#[derive(PartialEq, Debug)]
192	pub struct InitialInfo<'text> {
193	/// The text
194	pub text: &'text str,
195
196	/// The BidiClass of the character at each byte in the text.
197	/// If a character is multiple bytes, its class will appear multiple times in the vector.
198	pub original_classes: Vec<BidiClass>,
199
200	/// The boundaries and level of each paragraph within the text.
201	pub paragraphs: Vec<ParagraphInfo>,
202	}
203
204	impl<'text> InitialInfo<'text> {
205	/// Find the paragraphs and BidiClasses in a string of text.
206	///
207	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
208	///
209	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
210	/// character is found before the matching PDI. If no strong character is found, the class will
211	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
212	///
213	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
214	#[cfg_attr(feature = "flame_it", flamer::flame)]
215	#[cfg(feature = "hardcoded-data")]
216	pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> {
217	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
218	}
219
220	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
221	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
222	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
223	///
224	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
225	///
226	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
227	/// character is found before the matching PDI. If no strong character is found, the class will
228	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
229	#[cfg_attr(feature = "flame_it", flamer::flame)]
230	pub fn new_with_data_source<'a, D: BidiDataSource>(
231	data_source: &D,
232	text: &'a str,
233	default_para_level: Option<Level>,
234	) -> InitialInfo<'a> {
235	InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
236	}
237	}
238
239	/// Extended version of InitialInfo (not public API).
240	#[derive(PartialEq, Debug)]
241	struct InitialInfoExt<'text> {
242	/// The base InitialInfo for the text, recording its paragraphs and bidi classes.
243	base: InitialInfo<'text>,
244
245	/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
246	/// requires no further bidi processing (i.e. there are no RTL characters or bidi
247	/// control codes present).
248	pure_ltr: Vec<bool>,
249	}
250
251	impl<'text> InitialInfoExt<'text> {
252	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
253	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
254	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
255	///
256	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
257	///
258	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
259	/// character is found before the matching PDI. If no strong character is found, the class will
260	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
261	#[cfg_attr(feature = "flame_it", flamer::flame)]
262	pub fn new_with_data_source<'a, D: BidiDataSource>(
263	data_source: &D,
264	text: &'a str,
265	default_para_level: Option<Level>,
266	) -> InitialInfoExt<'a> {
267	let mut paragraphs = Vec::<ParagraphInfo>::new();
268	let mut pure_ltr = Vec::<bool>::new();
269	let (original_classes, _, _) = compute_initial_info(
270	data_source,
271	text,
272	default_para_level,
273	Some((&mut paragraphs, &mut pure_ltr)),
274	);
275
276	InitialInfoExt {
277	base: InitialInfo {
278	text,
279	original_classes,
280	paragraphs,
281	},
282	pure_ltr,
283	}
284	}
285	}
286
287	/// Implementation of initial-info computation for both BidiInfo and ParagraphBidiInfo.
288	/// To treat the text as (potentially) multiple paragraphs, the caller should pass the
289	/// pair of optional outparam arrays to receive the ParagraphInfo and pure-ltr flags
290	/// for each paragraph. Passing None for split_paragraphs will ignore any paragraph-
291	/// separator characters in the text, treating it just as a single paragraph.
292	/// Returns the array of BidiClass values for each code unit of the text, along with
293	/// the embedding level and pure-ltr flag for the last* (or only) paragraph.*
294	fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
295	data_source: &D,
296	text: &'a T,
297	default_para_level: Option<Level>,
298	mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>,
299	) -> (Vec<BidiClass>, Level, bool) {
300	let mut original_classes = Vec::with_capacity(text.len());
301
302	// The stack contains the starting code unit index for each nested isolate we're inside.
303	let mut isolate_stack = Vec::new();
304
305	debug_assert!(
306	if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs {
307	paragraphs.is_empty() && pure_ltr.is_empty()
308	} else {
309	`true`
310	}
311	);
312
313	let mut para_start = `0`;
314	let mut para_level = default_para_level;
315
316	// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
317	// RTL characters or bidi control characters are encountered in the paragraph.
318	let mut is_pure_ltr = `true`;
319
320	#[cfg(feature = "flame_it")]
321	flame::start("compute_initial_info(): iter text.char_indices()");
322
323	for (i, c) in text.char_indices() {
324	let class = data_source.bidi_class(c);
325
326	#[cfg(feature = "flame_it")]
327	flame::start("original_classes.extend()");
328
329	let len = T::char_len(c);
330	original_classes.extend(repeat(class).take(len));
331
332	#[cfg(feature = "flame_it")]
333	flame::end("original_classes.extend()");
334
335	match class {
336	B => {
337	if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs {
338	// P1. Split the text into separate paragraphs. The paragraph separator is kept
339	// with the previous paragraph.
340	let para_end = i + len;
341	paragraphs.push(ParagraphInfo {
342	range: para_start..para_end,
343	// P3. If no character is found in p2, set the paragraph level to zero.
344	level: para_level.unwrap_or(LTR_LEVEL),
345	});
346	pure_ltr.push(is_pure_ltr);
347	// Reset state for the start of the next paragraph.
348	para_start = para_end;
349	// TODO: Support defaulting to direction of previous paragraph
350	//
351	// <http://www.unicode.org/reports/tr9/#HL1>
352	para_level = default_para_level;
353	is_pure_ltr = `true`;
354	isolate_stack.clear();
355	}
356	}
357
358	L \| R \| AL => {
359	if class != L {
360	is_pure_ltr = `false`;
361	}
362	match isolate_stack.last() {
363	Some(&start) => {
364	if original_classes[start] == FSI {
365	// X5c. If the first strong character between FSI and its matching
366	// PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
367	for j in `0`..T::char_len(chars::FSI) {
368	original_classes[start + j] = if class == L { LRI } else { RLI };
369	}
370	}
371	}
372
373	None => {
374	if para_level.is_none() {
375	// P2. Find the first character of type L, AL, or R, while skipping
376	// any characters between an isolate initiator and its matching
377	// PDI.
378	para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
379	}
380	}
381	}
382	}
383
384	AN \| LRE \| RLE \| LRO \| RLO => {
385	is_pure_ltr = `false`;
386	}
387
388	RLI \| LRI \| FSI => {
389	is_pure_ltr = `false`;
390	isolate_stack.push(i);
391	}
392
393	PDI => {
394	isolate_stack.pop();
395	}
396
397	_ => {}
398	}
399	}
400
401	if let Some((paragraphs, pure_ltr)) = split_paragraphs {
402	if para_start < text.len() {
403	paragraphs.push(ParagraphInfo {
404	range: para_start..text.len(),
405	level: para_level.unwrap_or(LTR_LEVEL),
406	});
407	pure_ltr.push(is_pure_ltr);
408	}
409	debug_assert_eq!(paragraphs.len(), pure_ltr.len());
410	}
411	debug_assert_eq!(original_classes.len(), text.len());
412
413	#[cfg(feature = "flame_it")]
414	flame::end("compute_initial_info(): iter text.char_indices()");
415
416	(
417	original_classes,
418	para_level.unwrap_or(LTR_LEVEL),
419	is_pure_ltr,
420	)
421	}
422
423	/// Bidi information of the text.
424	///
425	/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
426	/// character is multiple bytes wide, then its class and level will appear multiple times in these
427	/// vectors.
428	// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
429	#[derive(Debug, PartialEq)]
430	pub struct BidiInfo<'text> {
431	/// The text
432	pub text: &'text str,
433
434	/// The BidiClass of the character at each byte in the text.
435	pub original_classes: Vec<BidiClass>,
436
437	/// The directional embedding level of each byte in the text.
438	pub levels: Vec<Level>,
439
440	/// The boundaries and paragraph embedding level of each paragraph within the text.
441	///
442	/// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
443	/// Or just don't include the first paragraph, which always starts at 0?
444	pub paragraphs: Vec<ParagraphInfo>,
445	}
446
447	impl<'text> BidiInfo<'text> {
448	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
449	///
450	///
451	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
452	///
453	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
454	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
455	///
456	/// TODO: Support auto-RTL base direction
457	#[cfg_attr(feature = "flame_it", flamer::flame)]
458	#[cfg(feature = "hardcoded-data")]
459	#[inline]
460	pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> {
461	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
462	}
463
464	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
465	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
466	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
467	///
468	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
469	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
470	///
471	/// TODO: Support auto-RTL base direction
472	#[cfg_attr(feature = "flame_it", flamer::flame)]
473	pub fn new_with_data_source<'a, D: BidiDataSource>(
474	data_source: &D,
475	text: &'a str,
476	default_para_level: Option<Level>,
477	) -> BidiInfo<'a> {
478	let InitialInfoExt { base, pure_ltr, .. } =
479	InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
480
481	let mut levels = Vec::<Level>::with_capacity(text.len());
482	let mut processing_classes = base.original_classes.clone();
483
484	for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
485	let text = &text[para.range.clone()];
486	let original_classes = &base.original_classes[para.range.clone()];
487
488	compute_bidi_info_for_para(
489	data_source,
490	para,
491	*is_pure_ltr,
492	text,
493	original_classes,
494	&mut processing_classes,
495	&mut levels,
496	);
497	}
498
499	BidiInfo {
500	text,
501	original_classes: base.original_classes,
502	paragraphs: base.paragraphs,
503	levels,
504	}
505	}
506
507	/// Produce the levels for this paragraph as needed for reordering, one level per byte
508	/// in the paragraph. The returned vector includes bytes that are not included
509	/// in the `line`, but will not adjust them.
510	///
511	/// This runs [Rule L1], you can run
512	/// [Rule L2] by calling [`Self::reorder_visual()`].
513	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
514	/// to avoid non-byte indices.
515	///
516	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
517	///
518	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
519	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
520	#[cfg_attr(feature = "flame_it", flamer::flame)]
521	pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
522	assert!(line.start <= self.levels.len());
523	assert!(line.end <= self.levels.len());
524
525	let mut levels = self.levels.clone();
526	let line_classes = &self.original_classes[line.clone()];
527	let line_levels = &mut levels[line.clone()];
528
529	reorder_levels(
530	line_classes,
531	line_levels,
532	self.text.subrange(line),
533	para.level,
534	);
535
536	levels
537	}
538
539	/// Produce the levels for this paragraph as needed for reordering, one level per character
540	/// in the paragraph. The returned vector includes characters that are not included
541	/// in the `line`, but will not adjust them.
542	///
543	/// This runs [Rule L1], you can run
544	/// [Rule L2] by calling [`Self::reorder_visual()`].
545	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
546	/// to avoid non-byte indices.
547	///
548	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
549	///
550	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
551	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
552	#[cfg_attr(feature = "flame_it", flamer::flame)]
553	pub fn reordered_levels_per_char(
554	&self,
555	para: &ParagraphInfo,
556	line: Range<usize>,
557	) -> Vec<Level> {
558	let levels = self.reordered_levels(para, line);
559	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
560	}
561
562	/// Re-order a line based on resolved levels and return the line in display order.
563	///
564	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
565	///
566	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
567	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
568	#[cfg_attr(feature = "flame_it", flamer::flame)]
569	pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
570	if !level::has_rtl(&self.levels[line.clone()]) {
571	return self.text[line].into();
572	}
573	let (levels, runs) = self.visual_runs(para, line.clone());
574	reorder_line(self.text, line, levels, runs)
575	}
576
577	/// Reorders pre-calculated levels of a sequence of characters.
578	///
579	/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
580	/// intended to be used when an application has determined the levels of the objects (character sequences)
581	/// and just needs to have them reordered.
582	///
583	/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
584	///
585	/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
586	/// information about the actual text.
587	///
588	/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
589	/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
590	/// is for a single code point.
591	///
592	///
593	/// # # Example
594	/// ```
595	/// use unicode_bidi::BidiInfo;
596	/// use unicode_bidi::Level;
597	///
598	/// let l0 = Level::from(`0`);
599	/// let l1 = Level::from(`1`);
600	/// let l2 = Level::from(`2`);
601	///
602	/// let levels = vec![l0, l0, l0, l0];
603	/// let index_map = BidiInfo::reorder_visual(&levels);
604	/// assert_eq!(levels.len(), index_map.len());
605	/// assert_eq!(index_map, [`0`, `1`, `2`, `3`]);
606	///
607	/// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
608	/// let index_map = BidiInfo::reorder_visual(&levels);
609	/// assert_eq!(levels.len(), index_map.len());
610	/// assert_eq!(index_map, [`0`, `1`, `2`, `6`, `7`, `5`, `4`, `3`]);
611	/// ```
612	#[cfg_attr(feature = "flame_it", flamer::flame)]
613	#[inline]
614	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
615	reorder_visual(levels)
616	}
617
618	/// Find the level runs within a line and return them in visual order.
619	///
620	/// `line` is a range of bytes indices within `levels`.
621	///
622	/// The first return value is a vector of levels used by the reordering algorithm,
623	/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
624	/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
625	/// same level) should be displayed. Within each run, the display order can be checked
626	/// against the Level vector.
627	///
628	/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
629	/// as that should be handled by the engine using this API.
630	///
631	/// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
632	/// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
633	/// of producing a level map, since one may wish to deal with the fact that this is operating on
634	/// byte rather than character indices.
635	///
636	/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
637	///
638	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
639	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
640	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
641	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
642	#[cfg_attr(feature = "flame_it", flamer::flame)]
643	#[inline]
644	pub fn visual_runs(
645	&self,
646	para: &ParagraphInfo,
647	line: Range<usize>,
648	) -> (Vec<Level>, Vec<LevelRun>) {
649	let levels = self.reordered_levels(para, line.clone());
650	visual_runs_for_line(levels, &line)
651	}
652
653	/// If processed text has any computed RTL levels
654	///
655	/// This information is usually used to skip re-ordering of text when no RTL level is present
656	#[inline]
657	pub fn has_rtl(&self) -> bool {
658	level::has_rtl(&self.levels)
659	}
660	}
661
662	/// Bidi information of text treated as a single paragraph.
663	///
664	/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
665	/// character is multiple bytes wide, then its class and level will appear multiple times in these
666	/// vectors.
667	#[derive(Debug, PartialEq)]
668	pub struct ParagraphBidiInfo<'text> {
669	/// The text
670	pub text: &'text str,
671
672	/// The BidiClass of the character at each byte in the text.
673	pub original_classes: Vec<BidiClass>,
674
675	/// The directional embedding level of each byte in the text.
676	pub levels: Vec<Level>,
677
678	/// The paragraph embedding level.
679	pub paragraph_level: Level,
680
681	/// Whether the paragraph is purely LTR.
682	pub is_pure_ltr: bool,
683	}
684
685	impl<'text> ParagraphBidiInfo<'text> {
686	/// Determine the bidi embedding level.
687	///
688	///
689	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
690	///
691	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
692	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
693	///
694	/// TODO: Support auto-RTL base direction
695	#[cfg_attr(feature = "flame_it", flamer::flame)]
696	#[cfg(feature = "hardcoded-data")]
697	#[inline]
698	pub fn new(text: &str, default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
699	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
700	}
701
702	/// Determine the bidi embedding level, with a custom [`BidiDataSource`]
703	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
704	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
705	///
706	/// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
707	/// and should be kept in sync with it.
708	#[cfg_attr(feature = "flame_it", flamer::flame)]
709	pub fn new_with_data_source<'a, D: BidiDataSource>(
710	data_source: &D,
711	text: &'a str,
712	default_para_level: Option<Level>,
713	) -> ParagraphBidiInfo<'a> {
714	// Here we could create a ParagraphInitialInfo struct to parallel the one
715	// used by BidiInfo, but there doesn't seem any compelling reason for it.
716	let (original_classes, paragraph_level, is_pure_ltr) =
717	compute_initial_info(data_source, text, default_para_level, None);
718
719	let mut levels = Vec::<Level>::with_capacity(text.len());
720	let mut processing_classes = original_classes.clone();
721
722	let para_info = ParagraphInfo {
723	range: Range {
724	start: `0`,
725	end: text.len(),
726	},
727	level: paragraph_level,
728	};
729
730	compute_bidi_info_for_para(
731	data_source,
732	&para_info,
733	is_pure_ltr,
734	text,
735	&original_classes,
736	&mut processing_classes,
737	&mut levels,
738	);
739
740	ParagraphBidiInfo {
741	text,
742	original_classes,
743	levels,
744	paragraph_level,
745	is_pure_ltr,
746	}
747	}
748
749	/// Produce the levels for this paragraph as needed for reordering, one level per byte
750	/// in the paragraph. The returned vector includes bytes that are not included
751	/// in the `line`, but will not adjust them.
752	///
753	/// See BidiInfo::reordered_levels for details.
754	///
755	/// (This should be kept in sync with BidiInfo::reordered_levels.)
756	#[cfg_attr(feature = "flame_it", flamer::flame)]
757	pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
758	assert!(line.start <= self.levels.len());
759	assert!(line.end <= self.levels.len());
760
761	let mut levels = self.levels.clone();
762	let line_classes = &self.original_classes[line.clone()];
763	let line_levels = &mut levels[line.clone()];
764
765	reorder_levels(
766	line_classes,
767	line_levels,
768	self.text.subrange(line),
769	self.paragraph_level,
770	);
771
772	levels
773	}
774
775	/// Produce the levels for this paragraph as needed for reordering, one level per character
776	/// in the paragraph. The returned vector includes characters that are not included
777	/// in the `line`, but will not adjust them.
778	///
779	/// See BidiInfo::reordered_levels_per_char for details.
780	///
781	/// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
782	#[cfg_attr(feature = "flame_it", flamer::flame)]
783	pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
784	let levels = self.reordered_levels(line);
785	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
786	}
787
788	/// Re-order a line based on resolved levels and return the line in display order.
789	///
790	/// See BidiInfo::reorder_line for details.
791	///
792	/// (This should be kept in sync with BidiInfo::reorder_line.)
793	#[cfg_attr(feature = "flame_it", flamer::flame)]
794	pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, str> {
795	if !level::has_rtl(&self.levels[line.clone()]) {
796	return self.text[line].into();
797	}
798
799	let (levels, runs) = self.visual_runs(line.clone());
800
801	reorder_line(self.text, line, levels, runs)
802	}
803
804	/// Reorders pre-calculated levels of a sequence of characters.
805	///
806	/// See BidiInfo::reorder_visual for details.
807	#[cfg_attr(feature = "flame_it", flamer::flame)]
808	#[inline]
809	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
810	reorder_visual(levels)
811	}
812
813	/// Find the level runs within a line and return them in visual order.
814	///
815	/// `line` is a range of bytes indices within `levels`.
816	///
817	/// See BidiInfo::visual_runs for details.
818	///
819	/// (This should be kept in sync with BidiInfo::visual_runs.)
820	#[cfg_attr(feature = "flame_it", flamer::flame)]
821	#[inline]
822	pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
823	let levels = self.reordered_levels(line.clone());
824	visual_runs_for_line(levels, &line)
825	}
826
827	/// If processed text has any computed RTL levels
828	///
829	/// This information is usually used to skip re-ordering of text when no RTL level is present
830	#[inline]
831	pub fn has_rtl(&self) -> bool {
832	!self.is_pure_ltr
833	}
834
835	/// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
836	#[inline]
837	pub fn direction(&self) -> Direction {
838	para_direction(&self.levels)
839	}
840	}
841
842	/// Return a line of the text in display order based on resolved levels.
843	///
844	/// `text` the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
845	/// `line` a range of byte indices within `text` corresponding to one line
846	/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
847	/// `runs` array of `LevelRun`s in visual order
848	///
849	/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
850	/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
851	///
852	/// Returns: the reordered text of the line.
853	///
854	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
855	///
856	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
857	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
858	fn reorder_line<'text>(
859	text: &'text str,
860	line: Range<usize>,
861	levels: Vec<Level>,
862	runs: Vec<LevelRun>,
863	) -> Cow<'text, str> {
864	// If all isolating run sequences are LTR, no reordering is needed
865	if runs.iter().all(\|run: &Range\| levels[run.start].is_ltr()) {
866	return text[line].into();
867	}
868
869	let mut result: String = String::with_capacity(line.len());
870	for run: Range in runs {
871	if levels[run.start].is_rtl() {
872	result.extend(iter:text[run].chars().rev());
873	} else {
874	result.push_str(&text[run]);
875	}
876	}
877	result.into()
878	}
879
880	/// Find the level runs within a line and return them in visual order.
881	///
882	/// `line` is a range of code-unit indices within `levels`.
883	///
884	/// The first return value is a vector of levels used by the reordering algorithm,
885	/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
886	/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
887	/// same level) should be displayed. Within each run, the display order can be checked
888	/// against the Level vector.
889	///
890	/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
891	/// as that should be handled by the engine using this API.
892	///
893	/// Conceptually, this is the same as running [`reordered_levels()`] followed by
894	/// [`reorder_visual()`], however it returns the result as a list of level runs instead
895	/// of producing a level map, since one may wish to deal with the fact that this is operating on
896	/// byte rather than character indices.
897	///
898	/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
899	///
900	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
901	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
902	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
903	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
904	fn visual_runs_for_line(levels: Vec<Level>, line: &Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
905	// Find consecutive level runs.
906	let mut runs = Vec::new();
907	let mut start = line.start;
908	let mut run_level = levels[start];
909	let mut min_level = run_level;
910	let mut max_level = run_level;
911
912	for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + `1`) {
913	if new_level != run_level {
914	// End of the previous run, start of a new one.
915	runs.push(start..i);
916	start = i;
917	run_level = new_level;
918	min_level = cmp::min(run_level, min_level);
919	max_level = cmp::max(run_level, max_level);
920	}
921	}
922	runs.push(start..line.end);
923
924	let run_count = runs.len();
925
926	// Re-order the odd runs.
927	// <http://www.unicode.org/reports/tr9/#L2>
928
929	// Stop at the lowest odd* level.*
930	min_level = min_level.new_lowest_ge_rtl().expect("Level error");
931	// This loop goes through contiguous chunks of level runs that have a level
932	// ≥ max_level and reverses their contents, reducing max_level by 1 each time.
933	while max_level >= min_level {
934	// Look for the start of a sequence of consecutive runs of max_level or higher.
935	let mut seq_start = `0`;
936	while seq_start < run_count {
937	if levels[runs[seq_start].start] < max_level {
938	seq_start += `1`;
939	continue;
940	}
941
942	// Found the start of a sequence. Now find the end.
943	let mut seq_end = seq_start + `1`;
944	while seq_end < run_count {
945	if levels[runs[seq_end].start] < max_level {
946	break;
947	}
948	seq_end += `1`;
949	}
950	// Reverse the runs within this sequence.
951	runs[seq_start..seq_end].reverse();
952
953	seq_start = seq_end;
954	}
955	max_level
956	.lower(`1`)
957	.expect("Lowering embedding level below zero");
958	}
959	(levels, runs)
960	}
961
962	/// Reorders pre-calculated levels of a sequence of characters.
963	///
964	/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
965	/// intended to be used when an application has determined the levels of the objects (character sequences)
966	/// and just needs to have them reordered.
967	///
968	/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
969	///
970	/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
971	/// information about the actual text.
972	///
973	/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
974	/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
975	/// is for a single code point.
976	fn reorder_visual(levels: &[Level]) -> Vec<usize> {
977	// Gets the next range of characters after start_index with a level greater
978	// than or equal to `max`
979	fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> {
980	if levels.is_empty() \|\| start_index >= levels.len() {
981	return start_index..start_index;
982	}
983	while let Some(l) = levels.get(start_index) {
984	if *l >= max {
985	break;
986	}
987	start_index += `1`;
988	}
989
990	if levels.get(start_index).is_none() {
991	// If at the end of the array, adding one will
992	// produce an out-of-range end element
993	return start_index..start_index;
994	}
995
996	let mut end_index = start_index + `1`;
997	while let Some(l) = levels.get(end_index) {
998	if *l < max {
999	return start_index..end_index;
1000	}
1001	end_index += `1`;
1002	}
1003
1004	start_index..end_index
1005	}
1006
1007	// This implementation is similar to the L2 implementation in `visual_runs()`
1008	// but it cannot benefit from a precalculated LevelRun vector so needs to be different.
1009
1010	if levels.is_empty() {
1011	return vec![];
1012	}
1013
1014	// Get the min and max levels
1015	let (mut min, mut max) = levels
1016	.iter()
1017	.fold((levels[`0`], levels[`0`]), \|(min, max), &l\| {
1018	(cmp::min(min, l), cmp::max(max, l))
1019	});
1020
1021	// Initialize an index map
1022	let mut result: Vec<usize> = (`0`..levels.len()).collect();
1023
1024	if min == max && min.is_ltr() {
1025	// Everything is LTR and at the same level, do nothing
1026	return result;
1027	}
1028
1029	// Stop at the lowest odd* level, since everything below that*
1030	// is LTR and does not need further reordering
1031	min = min.new_lowest_ge_rtl().expect("Level error");
1032
1033	// For each max level, take all contiguous chunks of
1034	// levels ≥ max and reverse them
1035	//
1036	// We can do this check with the original levels instead of checking reorderings because all
1037	// prior reorderings will have been for contiguous chunks of levels >> max, which will
1038	// be a subset of these chunks anyway.
1039	while min <= max {
1040	let mut range = `0`..`0`;
1041	loop {
1042	range = next_range(levels, range.end, max);
1043	result[range.clone()].reverse();
1044
1045	if range.end >= levels.len() {
1046	break;
1047	}
1048	}
1049
1050	max.lower(`1`).expect("Level error");
1051	}
1052
1053	result
1054	}
1055
1056	/// The core of BidiInfo initialization, factored out into a function that both
1057	/// the utf-8 and utf-16 versions of BidiInfo can use.
1058	fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1059	data_source: &D,
1060	para: &ParagraphInfo,
1061	is_pure_ltr: bool,
1062	text: &'a T,
1063	original_classes: &[BidiClass],
1064	processing_classes: &mut [BidiClass],
1065	levels: &mut Vec<Level>,
1066	) {
1067	let new_len = levels.len() + para.range.len();
1068	levels.resize(new_len, para.level);
1069	if para.level == LTR_LEVEL && is_pure_ltr {
1070	return;
1071	}
1072
1073	let processing_classes = &mut processing_classes[para.range.clone()];
1074	let levels = &mut levels[para.range.clone()];
1075
1076	explicit::compute(
1077	text,
1078	para.level,
1079	original_classes,
1080	levels,
1081	processing_classes,
1082	);
1083
1084	let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels);
1085	for sequence in &sequences {
1086	implicit::resolve_weak(text, sequence, processing_classes);
1087	implicit::resolve_neutral(
1088	text,
1089	data_source,
1090	sequence,
1091	levels,
1092	original_classes,
1093	processing_classes,
1094	);
1095	}
1096	implicit::resolve_levels(processing_classes, levels);
1097
1098	assign_levels_to_removed_chars(para.level, original_classes, levels);
1099	}
1100
1101	/// Produce the levels for this paragraph as needed for reordering, one level per code unit
1102	/// in the paragraph. The returned vector includes code units that are not included
1103	/// in the `line`, but will not adjust them.
1104	///
1105	/// This runs [Rule L1]
1106	///
1107	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
1108	fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
1109	line_classes: &[BidiClass],
1110	line_levels: &mut [Level],
1111	line_text: &'a T,
1112	para_level: Level,
1113	) {
1114	// Reset some whitespace chars to paragraph level.
1115	// <http://www.unicode.org/reports/tr9/#L1>
1116	let mut reset_from: Option<usize> = Some(`0`);
1117	let mut reset_to: Option<usize> = None;
1118	let mut prev_level = para_level;
1119	for (i, c) in line_text.char_indices() {
1120	match line_classes[i] {
1121	// Segment separator, Paragraph separator
1122	B \| S => {
1123	assert_eq!(reset_to, None);
1124	reset_to = Some(i + T::char_len(c));
1125	if reset_from == None {
1126	reset_from = Some(i);
1127	}
1128	}
1129	// Whitespace, isolate formatting
1130	WS \| FSI \| LRI \| RLI \| PDI => {
1131	if reset_from == None {
1132	reset_from = Some(i);
1133	}
1134	}
1135	// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
1136	// same as above + set the level
1137	RLE \| LRE \| RLO \| LRO \| PDF \| BN => {
1138	if reset_from == None {
1139	reset_from = Some(i);
1140	}
1141	// also set the level to previous
1142	line_levels[i] = prev_level;
1143	}
1144	_ => {
1145	reset_from = None;
1146	}
1147	}
1148	if let (Some(from), Some(to)) = (reset_from, reset_to) {
1149	for level in &mut line_levels[from..to] {
1150	*level = para_level;
1151	}
1152	reset_from = None;
1153	reset_to = None;
1154	}
1155	prev_level = line_levels[i];
1156	}
1157	if let Some(from) = reset_from {
1158	for level in &mut line_levels[from..] {
1159	*level = para_level;
1160	}
1161	}
1162	}
1163
1164	/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
1165	/// And it supports all operation in the `Paragraph` that needs also its
1166	/// `BidiInfo` such as `direction`.
1167	#[derive(Debug)]
1168	pub struct Paragraph<'a, 'text> {
1169	pub info: &'a BidiInfo<'text>,
1170	pub para: &'a ParagraphInfo,
1171	}
1172
1173	impl<'a, 'text> Paragraph<'a, 'text> {
1174	#[inline]
1175	pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
1176	Paragraph { info, para }
1177	}
1178
1179	/// Returns if the paragraph is Left direction, right direction or mixed.
1180	#[inline]
1181	pub fn direction(&self) -> Direction {
1182	para_direction(&self.info.levels[self.para.range.clone()])
1183	}
1184
1185	/// Returns the `Level` of a certain character in the paragraph.
1186	#[inline]
1187	pub fn level_at(&self, pos: usize) -> Level {
1188	let actual_position: usize = self.para.range.start + pos;
1189	self.info.levels[actual_position]
1190	}
1191	}
1192
1193	/// Return the directionality of the paragraph (Left, Right or Mixed) from its levels.
1194	#[cfg_attr(feature = "flame_it", flamer::flame)]
1195	fn para_direction(levels: &[Level]) -> Direction {
1196	let mut ltr = `false`;
1197	let mut rtl = `false`;
1198	for level in levels {
1199	if level.is_ltr() {
1200	ltr = `true`;
1201	if rtl {
1202	return Direction::Mixed;
1203	}
1204	}
1205
1206	if level.is_rtl() {
1207	rtl = `true`;
1208	if ltr {
1209	return Direction::Mixed;
1210	}
1211	}
1212	}
1213
1214	if ltr {
1215	return Direction::Ltr;
1216	}
1217
1218	Direction::Rtl
1219	}
1220
1221	/// Assign levels to characters removed by rule X9.
1222	///
1223	/// The levels assigned to these characters are not specified by the algorithm. This function
1224	/// assigns each one the level of the previous character, to avoid breaking level runs.
1225	#[cfg_attr(feature = "flame_it", flamer::flame)]
1226	fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
1227	for i: usize in `0`..levels.len() {
1228	if prepare::removed_by_x9(class:classes[i]) {
1229	levels[i] = if i > `0` { levels[i - `1`] } else { para_level };
1230	}
1231	}
1232	}
1233
1234	/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm.
1235	///
1236	/// See rules P2 and P3.
1237	///
1238	/// The base direction is derived from the first character in the string with bidi character type
1239	/// L, R, or AL. If the first such character has type L, Direction::Ltr is returned. If the first
1240	/// such character has type R or AL, Direction::Rtl is returned.
1241	///
1242	/// If the string does not contain any character of these types (outside of embedded isolate runs),
1243	/// then Direction::Mixed is returned (but should be considered as meaning "neutral" or "unknown",
1244	/// not in fact mixed directions).
1245	///
1246	/// This is a lightweight function for use when only the base direction is needed and no further
1247	/// bidi processing of the text is needed.
1248	///
1249	/// If the text contains paragraph separators, this function considers only the first paragraph.
1250	#[cfg(feature = "hardcoded-data")]
1251	#[inline]
1252	pub fn get_base_direction<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1253	get_base_direction_with_data_source(&HardcodedBidiData, text)
1254	}
1255
1256	/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm,
1257	/// considering the full text if the first paragraph is all-neutral.
1258	///
1259	/// This is the same as get_base_direction except that it does not stop at the first block
1260	/// separator, but just resets the embedding level and continues to look for a strongly-
1261	/// directional character. So the result will be the base direction of the first paragraph
1262	/// that is not purely neutral characters.
1263	#[cfg(feature = "hardcoded-data")]
1264	#[inline]
1265	pub fn get_base_direction_full<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1266	get_base_direction_full_with_data_source(&HardcodedBidiData, text)
1267	}
1268
1269	#[inline]
1270	pub fn get_base_direction_with_data_source<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1271	data_source: &D,
1272	text: &'a T,
1273	) -> Direction {
1274	get_base_direction_impl(data_source, text, use_full_text:`false`)
1275	}
1276
1277	#[inline]
1278	pub fn get_base_direction_full_with_data_source<
1279	'a,
1280	D: BidiDataSource,
1281	T: TextSource<'a> + ?Sized,
1282	>(
1283	data_source: &D,
1284	text: &'a T,
1285	) -> Direction {
1286	get_base_direction_impl(data_source, text, use_full_text:`true`)
1287	}
1288
1289	fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1290	data_source: &D,
1291	text: &'a T,
1292	use_full_text: bool,
1293	) -> Direction {
1294	let mut isolate_level: i32 = `0`;
1295	for c: char in text.chars() {
1296	match data_source.bidi_class(c) {
1297	LRI \| RLI \| FSI => isolate_level = isolate_level + `1`,
1298	PDI if isolate_level > `0` => isolate_level = isolate_level - `1`,
1299	L if isolate_level == `0` => return Direction::Ltr,
1300	R \| AL if isolate_level == `0` => return Direction::Rtl,
1301	B if !use_full_text => break,
1302	B if use_full_text => isolate_level = `0`,
1303	_ => (),
1304	}
1305	}
1306	// If no strong char was found, return Mixed. Normally this will be treated as Ltr by callers
1307	// (see rule P3), but we don't map this to Ltr here so that a caller that wants to apply other
1308	// heuristics to an all-neutral paragraph can tell the difference.
1309	Direction::Mixed
1310	}
1311
1312	/// Implementation of TextSource for UTF-8 text (a string slice).
1313	impl<'text> TextSource<'text> for str {
1314	type CharIter = core::str::Chars<'text>;
1315	type CharIndexIter = core::str::CharIndices<'text>;
1316	type IndexLenIter = Utf8IndexLenIter<'text>;
1317
1318	#[inline]
1319	fn len(&self) -> usize {
1320	(self as &str).len()
1321	}
1322	#[inline]
1323	fn char_at(&self, index: usize) -> Option<(char, usize)> {
1324	if let Some(slice) = self.get(index..) {
1325	if let Some(ch) = slice.chars().next() {
1326	return Some((ch, ch.len_utf8()));
1327	}
1328	}
1329	None
1330	}
1331	#[inline]
1332	fn subrange(&self, range: Range<usize>) -> &Self {
1333	&(self as &str)[range]
1334	}
1335	#[inline]
1336	fn chars(&'text self) -> Self::CharIter {
1337	(self as &str).chars()
1338	}
1339	#[inline]
1340	fn char_indices(&'text self) -> Self::CharIndexIter {
1341	(self as &str).char_indices()
1342	}
1343	#[inline]
1344	fn indices_lengths(&'text self) -> Self::IndexLenIter {
1345	Utf8IndexLenIter::new(&self)
1346	}
1347	#[inline]
1348	fn char_len(ch: char) -> usize {
1349	ch.len_utf8()
1350	}
1351	}
1352
1353	/// Iterator over (UTF-8) string slices returning (index, char_len) tuple.
1354	#[derive(Debug)]
1355	pub struct Utf8IndexLenIter<'text> {
1356	iter: CharIndices<'text>,
1357	}
1358
1359	impl<'text> Utf8IndexLenIter<'text> {
1360	#[inline]
1361	pub fn new(text: &'text str) -> Self {
1362	Utf8IndexLenIter {
1363	iter: text.char_indices(),
1364	}
1365	}
1366	}
1367
1368	impl Iterator for Utf8IndexLenIter<'_> {
1369	type Item = (usize, usize);
1370
1371	#[inline]
1372	fn next(&mut self) -> Option<Self::Item> {
1373	if let Some((pos: usize, ch: char)) = self.iter.next() {
1374	return Some((pos, ch.len_utf8()));
1375	}
1376	None
1377	}
1378	}
1379
1380	#[cfg(test)]
1381	fn to_utf16(s: &str) -> Vec<u16> {
1382	s.encode_utf16().collect()
1383	}
1384
1385	#[cfg(test)]
1386	#[cfg(feature = "hardcoded-data")]
1387	mod tests {
1388	use super::*;
1389
1390	use utf16::{
1391	BidiInfo as BidiInfoU16, InitialInfo as InitialInfoU16, Paragraph as ParagraphU16,
1392	ParagraphBidiInfo as ParagraphBidiInfoU16,
1393	};
1394
1395	#[test]
1396	fn test_utf16_text_source() {
1397	let text: &[u16] =
1398	&[`0x41`, `0xD801`, `0xDC01`, `0x20`, `0xD800`, `0x20`, `0xDFFF`, `0x20`, `0xDC00`, `0xD800`];
1399	assert_eq!(text.char_at(`0`), Some(('A', `1`)));
1400	assert_eq!(text.char_at(`1`), Some(('`\u{10401}`', `2`)));
1401	assert_eq!(text.char_at(`2`), None);
1402	assert_eq!(text.char_at(`3`), Some((' ', `1`)));
1403	assert_eq!(text.char_at(`4`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1404	assert_eq!(text.char_at(`5`), Some((' ', `1`)));
1405	assert_eq!(text.char_at(`6`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1406	assert_eq!(text.char_at(`7`), Some((' ', `1`)));
1407	assert_eq!(text.char_at(`8`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1408	assert_eq!(text.char_at(`9`), Some((char::REPLACEMENT_CHARACTER, `1`)));
1409	assert_eq!(text.char_at(`10`), None);
1410	}
1411
1412	#[test]
1413	fn test_utf16_char_iter() {
1414	let text: &[u16] =
1415	&[`0x41`, `0xD801`, `0xDC01`, `0x20`, `0xD800`, `0x20`, `0xDFFF`, `0x20`, `0xDC00`, `0xD800`];
1416	assert_eq!(text.len(), `10`);
1417	assert_eq!(text.chars().count(), `9`);
1418	let mut chars = text.chars();
1419	assert_eq!(chars.next(), Some('A'));
1420	assert_eq!(chars.next(), Some('`\u{10401}`'));
1421	assert_eq!(chars.next(), Some(' '));
1422	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1423	assert_eq!(chars.next(), Some(' '));
1424	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1425	assert_eq!(chars.next(), Some(' '));
1426	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1427	assert_eq!(chars.next(), Some('`\u{FFFD}`'));
1428	assert_eq!(chars.next(), None);
1429	}
1430
1431	#[test]
1432	fn test_initial_text_info() {
1433	let tests = vec![
1434	(
1435	// text
1436	"a1",
1437	// expected bidi classes per utf-8 byte
1438	vec![L, EN],
1439	// expected paragraph-info for utf-8
1440	vec![ParagraphInfo {
1441	range: `0`..`2`,
1442	level: LTR_LEVEL,
1443	}],
1444	// expected bidi classes per utf-16 code unit
1445	vec![L, EN],
1446	// expected paragraph-info for utf-16
1447	vec![ParagraphInfo {
1448	range: `0`..`2`,
1449	level: LTR_LEVEL,
1450	}],
1451	),
1452	(
1453	// Arabic, space, Hebrew
1454	"`\u{0639}` `\u{05D0}`",
1455	vec![AL, AL, WS, R, R],
1456	vec![ParagraphInfo {
1457	range: `0`..`5`,
1458	level: RTL_LEVEL,
1459	}],
1460	vec![AL, WS, R],
1461	vec![ParagraphInfo {
1462	range: `0`..`3`,
1463	level: RTL_LEVEL,
1464	}],
1465	),
1466	(
1467	// SMP characters from Kharoshthi, Cuneiform, Adlam:
1468	"`\u{10A00}\u{12000}\u{1E900}`",
1469	vec![R, R, R, R, L, L, L, L, R, R, R, R],
1470	vec![ParagraphInfo {
1471	range: `0`..`12`,
1472	level: RTL_LEVEL,
1473	}],
1474	vec![R, R, L, L, R, R],
1475	vec![ParagraphInfo {
1476	range: `0`..`6`,
1477	level: RTL_LEVEL,
1478	}],
1479	),
1480	(
1481	"a`\u{2029}`b",
1482	vec![L, B, B, B, L],
1483	vec![
1484	ParagraphInfo {
1485	range: `0`..`4`,
1486	level: LTR_LEVEL,
1487	},
1488	ParagraphInfo {
1489	range: `4`..`5`,
1490	level: LTR_LEVEL,
1491	},
1492	],
1493	vec![L, B, L],
1494	vec![
1495	ParagraphInfo {
1496	range: `0`..`2`,
1497	level: LTR_LEVEL,
1498	},
1499	ParagraphInfo {
1500	range: `2`..`3`,
1501	level: LTR_LEVEL,
1502	},
1503	],
1504	),
1505	(
1506	"`\u{2068}`א`\u{2069}`a", // U+2068 FSI, U+2069 PDI
1507	vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
1508	vec![ParagraphInfo {
1509	range: `0`..`9`,
1510	level: LTR_LEVEL,
1511	}],
1512	vec![RLI, R, PDI, L],
1513	vec![ParagraphInfo {
1514	range: `0`..`4`,
1515	level: LTR_LEVEL,
1516	}],
1517	),
1518	];
1519
1520	for t in tests {
1521	assert_eq!(
1522	InitialInfo::new(t.0, None),
1523	InitialInfo {
1524	text: t.0,
1525	original_classes: t.1,
1526	paragraphs: t.2,
1527	}
1528	);
1529	let text = &to_utf16(t.0);
1530	assert_eq!(
1531	InitialInfoU16::new(text, None),
1532	InitialInfoU16 {
1533	text,
1534	original_classes: t.3,
1535	paragraphs: t.4,
1536	}
1537	);
1538	}
1539	}
1540
1541	#[test]
1542	#[cfg(feature = "hardcoded-data")]
1543	fn test_process_text() {
1544	let tests = vec![
1545	(
1546	// text
1547	"abc123",
1548	// base level
1549	Some(LTR_LEVEL),
1550	// levels
1551	Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`]),
1552	// original_classes
1553	vec![L, L, L, EN, EN, EN],
1554	// paragraphs
1555	vec![ParagraphInfo {
1556	range: `0`..`6`,
1557	level: LTR_LEVEL,
1558	}],
1559	// levels_u16
1560	Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`]),
1561	// original_classes_u16
1562	vec![L, L, L, EN, EN, EN],
1563	// paragraphs_u16
1564	vec![ParagraphInfo {
1565	range: `0`..`6`,
1566	level: LTR_LEVEL,
1567	}],
1568	),
1569	(
1570	"abc `\u{05D0}\u{05D1}\u{05D2}`",
1571	Some(LTR_LEVEL),
1572	Level::vec(&[`0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1573	vec![L, L, L, WS, R, R, R, R, R, R],
1574	vec![ParagraphInfo {
1575	range: `0`..`10`,
1576	level: LTR_LEVEL,
1577	}],
1578	Level::vec(&[`0`, `0`, `0`, `0`, `1`, `1`, `1`]),
1579	vec![L, L, L, WS, R, R, R],
1580	vec![ParagraphInfo {
1581	range: `0`..`7`,
1582	level: LTR_LEVEL,
1583	}],
1584	),
1585	(
1586	"abc `\u{05D0}\u{05D1}\u{05D2}`",
1587	Some(RTL_LEVEL),
1588	Level::vec(&[`2`, `2`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`]),
1589	vec![L, L, L, WS, R, R, R, R, R, R],
1590	vec![ParagraphInfo {
1591	range: `0`..`10`,
1592	level: RTL_LEVEL,
1593	}],
1594	Level::vec(&[`2`, `2`, `2`, `1`, `1`, `1`, `1`]),
1595	vec![L, L, L, WS, R, R, R],
1596	vec![ParagraphInfo {
1597	range: `0`..`7`,
1598	level: RTL_LEVEL,
1599	}],
1600	),
1601	(
1602	"`\u{05D0}\u{05D1}\u{05D2}` abc",
1603	Some(LTR_LEVEL),
1604	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`]),
1605	vec![R, R, R, R, R, R, WS, L, L, L],
1606	vec![ParagraphInfo {
1607	range: `0`..`10`,
1608	level: LTR_LEVEL,
1609	}],
1610	Level::vec(&[`1`, `1`, `1`, `0`, `0`, `0`, `0`]),
1611	vec![R, R, R, WS, L, L, L],
1612	vec![ParagraphInfo {
1613	range: `0`..`7`,
1614	level: LTR_LEVEL,
1615	}],
1616	),
1617	(
1618	"`\u{05D0}\u{05D1}\u{05D2}` abc",
1619	None,
1620	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`]),
1621	vec![R, R, R, R, R, R, WS, L, L, L],
1622	vec![ParagraphInfo {
1623	range: `0`..`10`,
1624	level: RTL_LEVEL,
1625	}],
1626	Level::vec(&[`1`, `1`, `1`, `1`, `2`, `2`, `2`]),
1627	vec![R, R, R, WS, L, L, L],
1628	vec![ParagraphInfo {
1629	range: `0`..`7`,
1630	level: RTL_LEVEL,
1631	}],
1632	),
1633	(
1634	"`\u{063A}`2`\u{0638}` `\u{05D0}`2`\u{05D2}`",
1635	Some(LTR_LEVEL),
1636	Level::vec(&[`1`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `1`]),
1637	vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
1638	vec![ParagraphInfo {
1639	range: `0`..`11`,
1640	level: LTR_LEVEL,
1641	}],
1642	Level::vec(&[`1`, `2`, `1`, `1`, `1`, `2`, `1`]),
1643	vec![AL, EN, AL, WS, R, EN, R],
1644	vec![ParagraphInfo {
1645	range: `0`..`7`,
1646	level: LTR_LEVEL,
1647	}],
1648	),
1649	(
1650	"a א.`\n`ג",
1651	None,
1652	Level::vec(&[`0`, `0`, `1`, `1`, `0`, `0`, `1`, `1`]),
1653	vec![L, WS, R, R, CS, B, R, R],
1654	vec![
1655	ParagraphInfo {
1656	range: `0`..`6`,
1657	level: LTR_LEVEL,
1658	},
1659	ParagraphInfo {
1660	range: `6`..`8`,
1661	level: RTL_LEVEL,
1662	},
1663	],
1664	Level::vec(&[`0`, `0`, `1`, `0`, `0`, `1`]),
1665	vec![L, WS, R, CS, B, R],
1666	vec![
1667	ParagraphInfo {
1668	range: `0`..`5`,
1669	level: LTR_LEVEL,
1670	},
1671	ParagraphInfo {
1672	range: `5`..`6`,
1673	level: RTL_LEVEL,
1674	},
1675	],
1676	),
1677	// BidiTest:69635 (AL ET EN)
1678	(
1679	"`\u{060B}\u{20CF}\u{06F9}`",
1680	None,
1681	Level::vec(&[`1`, `1`, `1`, `1`, `1`, `2`, `2`]),
1682	vec![AL, AL, ET, ET, ET, EN, EN],
1683	vec![ParagraphInfo {
1684	range: `0`..`7`,
1685	level: RTL_LEVEL,
1686	}],
1687	Level::vec(&[`1`, `1`, `2`]),
1688	vec![AL, ET, EN],
1689	vec![ParagraphInfo {
1690	range: `0`..`3`,
1691	level: RTL_LEVEL,
1692	}],
1693	),
1694	];
1695
1696	for t in tests {
1697	assert_eq!(
1698	BidiInfo::new(t.0, t.1),
1699	BidiInfo {
1700	text: t.0,
1701	levels: t.2.clone(),
1702	original_classes: t.3.clone(),
1703	paragraphs: t.4.clone(),
1704	}
1705	);
1706	// If it was a single paragraph, also test ParagraphBidiInfo.
1707	if t.4.len() == `1` {
1708	assert_eq!(
1709	ParagraphBidiInfo::new(t.0, t.1),
1710	ParagraphBidiInfo {
1711	text: t.0,
1712	original_classes: t.3,
1713	levels: t.2.clone(),
1714	paragraph_level: t.4[`0`].level,
1715	is_pure_ltr: !level::has_rtl(&t.2),
1716	}
1717	)
1718	}
1719	let text = &to_utf16(t.0);
1720	assert_eq!(
1721	BidiInfoU16::new(text, t.1),
1722	BidiInfoU16 {
1723	text,
1724	levels: t.5.clone(),
1725	original_classes: t.6.clone(),
1726	paragraphs: t.7.clone(),
1727	}
1728	);
1729	if t.7.len() == `1` {
1730	assert_eq!(
1731	ParagraphBidiInfoU16::new(text, t.1),
1732	ParagraphBidiInfoU16 {
1733	text: text,
1734	original_classes: t.6.clone(),
1735	levels: t.5.clone(),
1736	paragraph_level: t.7[`0`].level,
1737	is_pure_ltr: !level::has_rtl(&t.5),
1738	}
1739	)
1740	}
1741	}
1742	}
1743
1744	#[test]
1745	#[cfg(feature = "hardcoded-data")]
1746	fn test_paragraph_bidi_info() {
1747	// Passing text that includes a paragraph break to the ParagraphBidiInfo API:
1748	// this is a misuse of the API by the client, but our behavior is safe &
1749	// consistent. The embedded paragraph break acts like a separator (tab) would.
1750	let tests = vec![
1751	(
1752	"a א.`\n`ג",
1753	None,
1754	// utf-8 results:
1755	vec![L, WS, R, R, CS, B, R, R],
1756	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1757	// utf-16 results:
1758	vec![L, WS, R, CS, B, R],
1759	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`]),
1760	// paragraph level; is_pure_ltr
1761	LTR_LEVEL,
1762	`false`,
1763	),
1764	(
1765	"`\u{5d1}` a.`\n`b.",
1766	None,
1767	// utf-8 results:
1768	vec![R, R, WS, L, CS, B, L, CS],
1769	Level::vec(&[`1`, `1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1770	// utf-16 results:
1771	vec![R, WS, L, CS, B, L, CS],
1772	Level::vec(&[`1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1773	// paragraph level; is_pure_ltr
1774	RTL_LEVEL,
1775	`false`,
1776	),
1777	(
1778	"a א.`\t`ג",
1779	None,
1780	// utf-8 results:
1781	vec![L, WS, R, R, CS, S, R, R],
1782	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`]),
1783	// utf-16 results:
1784	vec![L, WS, R, CS, S, R],
1785	Level::vec(&[`0`, `0`, `1`, `1`, `1`, `1`]),
1786	// paragraph level; is_pure_ltr
1787	LTR_LEVEL,
1788	`false`,
1789	),
1790	(
1791	"`\u{5d1}` a.`\t`b.",
1792	None,
1793	// utf-8 results:
1794	vec![R, R, WS, L, CS, S, L, CS],
1795	Level::vec(&[`1`, `1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1796	// utf-16 results:
1797	vec![R, WS, L, CS, S, L, CS],
1798	Level::vec(&[`1`, `1`, `2`, `2`, `2`, `2`, `1`]),
1799	// paragraph level; is_pure_ltr
1800	RTL_LEVEL,
1801	`false`,
1802	),
1803	];
1804
1805	for t in tests {
1806	assert_eq!(
1807	ParagraphBidiInfo::new(t.0, t.1),
1808	ParagraphBidiInfo {
1809	text: t.0,
1810	original_classes: t.2,
1811	levels: t.3,
1812	paragraph_level: t.6,
1813	is_pure_ltr: t.7,
1814	}
1815	);
1816	let text = &to_utf16(t.0);
1817	assert_eq!(
1818	ParagraphBidiInfoU16::new(text, t.1),
1819	ParagraphBidiInfoU16 {
1820	text: text,
1821	original_classes: t.4,
1822	levels: t.5,
1823	paragraph_level: t.6,
1824	is_pure_ltr: t.7,
1825	}
1826	);
1827	}
1828	}
1829
1830	#[test]
1831	#[cfg(feature = "hardcoded-data")]
1832	fn test_bidi_info_has_rtl() {
1833	let tests = vec![
1834	// ASCII only
1835	("123", None, `false`),
1836	("123", Some(LTR_LEVEL), `false`),
1837	("123", Some(RTL_LEVEL), `false`),
1838	("abc", None, `false`),
1839	("abc", Some(LTR_LEVEL), `false`),
1840	("abc", Some(RTL_LEVEL), `false`),
1841	("abc 123", None, `false`),
1842	("abc`\n`123", None, `false`),
1843	// With Hebrew
1844	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1845	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", Some(LTR_LEVEL), `true`),
1846	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", Some(RTL_LEVEL), `true`),
1847	("abc `\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1848	("abc`\n\u{05D0}\u{05D1}\u{05BC}\u{05D2}`", None, `true`),
1849	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}` abc", None, `true`),
1850	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n`abc", None, `true`),
1851	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}` 123", None, `true`),
1852	("`\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n`123", None, `true`),
1853	];
1854
1855	for t in tests {
1856	assert_eq!(BidiInfo::new(t.0, t.1).has_rtl(), t.2);
1857	assert_eq!(BidiInfoU16::new(&to_utf16(t.0), t.1).has_rtl(), t.2);
1858	}
1859	}
1860
1861	#[cfg(feature = "hardcoded-data")]
1862	fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> {
1863	let bidi_info = BidiInfo::new(text, None);
1864	bidi_info
1865	.paragraphs
1866	.iter()
1867	.map(\|para\| bidi_info.reorder_line(para, para.range.clone()))
1868	.collect()
1869	}
1870
1871	#[cfg(feature = "hardcoded-data")]
1872	fn reorder_paras_u16(text: &[u16]) -> Vec<Cow<'_, [u16]>> {
1873	let bidi_info = BidiInfoU16::new(text, None);
1874	bidi_info
1875	.paragraphs
1876	.iter()
1877	.map(\|para\| bidi_info.reorder_line(para, para.range.clone()))
1878	.collect()
1879	}
1880
1881	#[test]
1882	#[cfg(feature = "hardcoded-data")]
1883	fn test_reorder_line() {
1884	let tests = vec![
1885	// Bidi_Class: L L L B L L L B L L L
1886	("abc`\n`def`\n`ghi", vec!["abc`\n`", "def`\n`", "ghi"]),
1887	// Bidi_Class: L L EN B L L EN B L L EN
1888	("ab1`\n`de2`\n`gh3", vec!["ab1`\n`", "de2`\n`", "gh3"]),
1889	// Bidi_Class: L L L B AL AL AL
1890	("abc`\n`ابج", vec!["abc`\n`", "جبا"]),
1891	// Bidi_Class: AL AL AL B L L L
1892	(
1893	"`\u{0627}\u{0628}\u{062C}\n`abc",
1894	vec!["`\n\u{062C}\u{0628}\u{0627}`", "abc"],
1895	),
1896	("1.-2", vec!["1.-2"]),
1897	("1-.2", vec!["1-.2"]),
1898	("abc אבג", vec!["abc גבא"]),
1899	// Numbers being weak LTR characters, cannot reorder strong RTL
1900	("123 `\u{05D0}\u{05D1}\u{05D2}`", vec!["גבא 123"]),
1901	("abc`\u{202A}`def", vec!["abc`\u{202A}`def"]),
1902	(
1903	"abc`\u{202A}`def`\u{202C}`ghi",
1904	vec!["abc`\u{202A}`def`\u{202C}`ghi"],
1905	),
1906	(
1907	"abc`\u{2066}`def`\u{2069}`ghi",
1908	vec!["abc`\u{2066}`def`\u{2069}`ghi"],
1909	),
1910	// Testing for RLE Character
1911	("`\u{202B}`abc אבג`\u{202C}`", vec!["`\u{202b}`גבא abc`\u{202c}`"]),
1912	// Testing neutral characters
1913	("`\u{05D0}`בג? אבג", vec!["גבא ?גבא"]),
1914	// Testing neutral characters with special case
1915	("A אבג?", vec!["A גבא?"]),
1916	// Testing neutral characters with Implicit RTL Marker
1917	("A אבג?`\u{200F}`", vec!["A `\u{200F}`?גבא"]),
1918	("`\u{05D0}`בג abc", vec!["abc גבא"]),
1919	("abc`\u{2067}`.-`\u{2069}`ghi", vec!["abc`\u{2067}`-.`\u{2069}`ghi"]),
1920	(
1921	"Hello, `\u{2068}\u{202E}`world`\u{202C}\u{2069}`!",
1922	vec!["Hello, `\u{2068}\u{202E}\u{202C}`dlrow`\u{2069}`!"],
1923	),
1924	// With mirrorable characters in RTL run
1925	("`\u{05D0}`(ב)ג.", vec![".ג)ב(א"]),
1926	// With mirrorable characters on level boundary
1927	("`\u{05D0}`ב(גד[&ef].)gh", vec!["gh).]ef&[דג(בא"]),
1928	];
1929
1930	for t in tests {
1931	assert_eq!(reorder_paras(t.0), t.1);
1932	let expect_utf16 = t.1.iter().map(\|v\| to_utf16(v)).collect::<Vec<_>>();
1933	assert_eq!(reorder_paras_u16(&to_utf16(t.0)), expect_utf16);
1934	}
1935	}
1936
1937	fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
1938	let bidi_info = BidiInfo::new(text, None);
1939	bidi_info
1940	.paragraphs
1941	.iter()
1942	.map(\|para\| bidi_info.reordered_levels(para, para.range.clone()))
1943	.collect()
1944	}
1945
1946	fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
1947	let bidi_info = BidiInfo::new(text, None);
1948	bidi_info
1949	.paragraphs
1950	.iter()
1951	.map(\|para\| bidi_info.reordered_levels_per_char(para, para.range.clone()))
1952	.collect()
1953	}
1954
1955	fn reordered_levels_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
1956	let bidi_info = BidiInfoU16::new(text, None);
1957	bidi_info
1958	.paragraphs
1959	.iter()
1960	.map(\|para\| bidi_info.reordered_levels(para, para.range.clone()))
1961	.collect()
1962	}
1963
1964	fn reordered_levels_per_char_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
1965	let bidi_info = BidiInfoU16::new(text, None);
1966	bidi_info
1967	.paragraphs
1968	.iter()
1969	.map(\|para\| bidi_info.reordered_levels_per_char(para, para.range.clone()))
1970	.collect()
1971	}
1972
1973	#[test]
1974	#[cfg(feature = "hardcoded-data")]
1975	fn test_reordered_levels() {
1976	let tests = vec![
1977	// BidiTest:946 (LRI PDI)
1978	(
1979	"`\u{2067}\u{2069}`",
1980	vec![Level::vec(&[`0`, `0`, `0`, `0`, `0`, `0`])],
1981	vec![Level::vec(&[`0`, `0`])],
1982	vec![Level::vec(&[`0`, `0`])],
1983	),
1984	// BidiTest:69635 (AL ET EN)
1985	(
1986	"`\u{060B}\u{20CF}\u{06F9}`",
1987	vec![Level::vec(&[`1`, `1`, `1`, `1`, `1`, `2`, `2`])],
1988	vec![Level::vec(&[`1`, `1`, `2`])],
1989	vec![Level::vec(&[`1`, `1`, `2`])],
1990	),
1991	];
1992
1993	for t in tests {
1994	assert_eq!(reordered_levels_for_paras(t.0), t.1);
1995	assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
1996	let text = &to_utf16(t.0);
1997	assert_eq!(reordered_levels_for_paras_u16(text), t.3);
1998	assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
1999	}
2000
2001	let tests = vec![
2002	// BidiTest:291284 (AN RLI PDF R)
2003	(
2004	"`\u{0605}\u{2067}\u{202C}\u{0590}`",
2005	vec![&["2", "2", "0", "0", "0", "x", "x", "x", "1", "1"]],
2006	vec![&["2", "0", "x", "1"]],
2007	vec![&["2", "0", "x", "1"]],
2008	),
2009	];
2010
2011	for t in tests {
2012	assert_eq!(reordered_levels_for_paras(t.0), t.1);
2013	assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2014	let text = &to_utf16(t.0);
2015	assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2016	assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2017	}
2018
2019	let text = "aa טֶ";
2020	let bidi_info = BidiInfo::new(text, None);
2021	assert_eq!(
2022	bidi_info.reordered_levels(&bidi_info.paragraphs[`0`], `3`..`7`),
2023	Level::vec(&[`0`, `0`, `0`, `1`, `1`, `1`, `1`]),
2024	);
2025
2026	let text = &to_utf16(text);
2027	let bidi_info = BidiInfoU16::new(text, None);
2028	assert_eq!(
2029	bidi_info.reordered_levels(&bidi_info.paragraphs[`0`], `1`..`4`),
2030	Level::vec(&[`0`, `0`, `0`, `1`, `1`]),
2031	);
2032	}
2033
2034	#[test]
2035	fn test_paragraph_info_len() {
2036	let text = "hello world";
2037	let bidi_info = BidiInfo::new(text, None);
2038	assert_eq!(bidi_info.paragraphs.len(), `1`);
2039	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len());
2040
2041	let text2 = "How are you";
2042	let whole_text = format!("{}`\n`{}", text, text2);
2043	let bidi_info = BidiInfo::new(&whole_text, None);
2044	assert_eq!(bidi_info.paragraphs.len(), `2`);
2045
2046	// The first paragraph include the paragraph separator.
2047	// TODO: investigate if the paragraph separator character
2048	// should not be part of any paragraph.
2049	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len() + `1`);
2050	assert_eq!(bidi_info.paragraphs[`1`].len(), text2.len());
2051
2052	let text = &to_utf16(text);
2053	let bidi_info = BidiInfoU16::new(text, None);
2054	assert_eq!(bidi_info.paragraphs.len(), `1`);
2055	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len());
2056
2057	let text2 = &to_utf16(text2);
2058	let whole_text = &to_utf16(&whole_text);
2059	let bidi_info = BidiInfoU16::new(&whole_text, None);
2060	assert_eq!(bidi_info.paragraphs.len(), `2`);
2061
2062	assert_eq!(bidi_info.paragraphs[`0`].len(), text.len() + `1`);
2063	assert_eq!(bidi_info.paragraphs[`1`].len(), text2.len());
2064	}
2065
2066	#[test]
2067	fn test_direction() {
2068	let ltr_text = "hello world";
2069	let rtl_text = "أهلا بكم";
2070	let all_paragraphs = format!("{}`\n`{}`\n`{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2071	let bidi_info = BidiInfo::new(&all_paragraphs, None);
2072	assert_eq!(bidi_info.paragraphs.len(), `3`);
2073	let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2074	let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2075	let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2076	assert_eq!(p_ltr.direction(), Direction::Ltr);
2077	assert_eq!(p_rtl.direction(), Direction::Rtl);
2078	assert_eq!(p_mixed.direction(), Direction::Mixed);
2079
2080	let all_paragraphs = &to_utf16(&all_paragraphs);
2081	let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2082	assert_eq!(bidi_info.paragraphs.len(), `3`);
2083	let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2084	let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2085	let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2086	assert_eq!(p_ltr.direction(), Direction::Ltr);
2087	assert_eq!(p_rtl.direction(), Direction::Rtl);
2088	assert_eq!(p_mixed.direction(), Direction::Mixed);
2089	}
2090
2091	#[test]
2092	fn test_edge_cases_direction() {
2093	// No paragraphs for empty text.
2094	let empty = "";
2095	let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL));
2096	assert_eq!(bidi_info.paragraphs.len(), `0`);
2097
2098	let empty = &to_utf16(empty);
2099	let bidi_info = BidiInfoU16::new(empty, Option::from(RTL_LEVEL));
2100	assert_eq!(bidi_info.paragraphs.len(), `0`);
2101
2102	let tests = vec![
2103	// The paragraph separator will take the value of the default direction
2104	// which is left to right.
2105	("`\n`", None, Direction::Ltr),
2106	// The paragraph separator will take the value of the given initial direction
2107	// which is left to right.
2108	("`\n`", Option::from(LTR_LEVEL), Direction::Ltr),
2109	// The paragraph separator will take the value of the given initial direction
2110	// which is right to left.
2111	("`\n`", Option::from(RTL_LEVEL), Direction::Rtl),
2112	];
2113
2114	for t in tests {
2115	let bidi_info = BidiInfo::new(t.0, t.1);
2116	assert_eq!(bidi_info.paragraphs.len(), `1`);
2117	let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2118	assert_eq!(p.direction(), t.2);
2119	let text = &to_utf16(t.0);
2120	let bidi_info = BidiInfoU16::new(text, t.1);
2121	let p = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2122	assert_eq!(p.direction(), t.2);
2123	}
2124	}
2125
2126	#[test]
2127	fn test_level_at() {
2128	let ltr_text = "hello world";
2129	let rtl_text = "أهلا بكم";
2130	let all_paragraphs = format!("{}`\n`{}`\n`{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2131	let bidi_info = BidiInfo::new(&all_paragraphs, None);
2132	assert_eq!(bidi_info.paragraphs.len(), `3`);
2133
2134	let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2135	let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2136	let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2137
2138	assert_eq!(p_ltr.level_at(`0`), LTR_LEVEL);
2139	assert_eq!(p_rtl.level_at(`0`), RTL_LEVEL);
2140	assert_eq!(p_mixed.level_at(`0`), LTR_LEVEL);
2141	assert_eq!(p_mixed.info.levels.len(), `54`);
2142	assert_eq!(p_mixed.para.range.start, `28`);
2143	assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2144
2145	let all_paragraphs = &to_utf16(&all_paragraphs);
2146	let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2147	assert_eq!(bidi_info.paragraphs.len(), `3`);
2148
2149	let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`0`]);
2150	let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`1`]);
2151	let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[`2`]);
2152
2153	assert_eq!(p_ltr.level_at(`0`), LTR_LEVEL);
2154	assert_eq!(p_rtl.level_at(`0`), RTL_LEVEL);
2155	assert_eq!(p_mixed.level_at(`0`), LTR_LEVEL);
2156	assert_eq!(p_mixed.info.levels.len(), `40`);
2157	assert_eq!(p_mixed.para.range.start, `21`);
2158	assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2159	}
2160
2161	#[test]
2162	fn test_get_base_direction() {
2163	let tests = vec![
2164	("", Direction::Mixed), // return Mixed if no strong character found
2165	("123[]-+`\u{2019}\u{2060}\u{00bf}`?", Direction::Mixed),
2166	("3.14`\n`pi", Direction::Mixed), // only first paragraph is considered
2167	("[123 'abc']", Direction::Ltr),
2168	("[123 '`\u{0628}`' abc", Direction::Rtl),
2169	("[123 '`\u{2066}`abc`\u{2069}`'`\u{0628}`]", Direction::Rtl), // embedded isolate is ignored
2170	("[123 '`\u{2066}`abc`\u{2068}`'`\u{0628}`]", Direction::Mixed),
2171	];
2172
2173	for t in tests {
2174	assert_eq!(get_base_direction(t.0), t.1);
2175	let text = &to_utf16(t.0);
2176	assert_eq!(get_base_direction(text.as_slice()), t.1);
2177	}
2178	}
2179
2180	#[test]
2181	fn test_get_base_direction_full() {
2182	let tests = vec![
2183	("", Direction::Mixed), // return Mixed if no strong character found
2184	("123[]-+`\u{2019}\u{2060}\u{00bf}`?", Direction::Mixed),
2185	("3.14`\n`pi", Direction::Ltr), // direction taken from the second paragraph
2186	("3.14`\n\u{05D0}`", Direction::Rtl), // direction taken from the second paragraph
2187	("[123 'abc']", Direction::Ltr),
2188	("[123 '`\u{0628}`' abc", Direction::Rtl),
2189	("[123 '`\u{2066}`abc`\u{2069}`'`\u{0628}`]", Direction::Rtl), // embedded isolate is ignored
2190	("[123 '`\u{2066}`abc`\u{2068}`'`\u{0628}`]", Direction::Mixed),
2191	("[123 '`\u{2066}`abc`\u{2068}`'`\n\u{0628}`]", Direction::Rtl), // \n resets embedding level
2192	];
2193
2194	for t in tests {
2195	assert_eq!(get_base_direction_full(t.0), t.1);
2196	let text = &to_utf16(t.0);
2197	assert_eq!(get_base_direction_full(text.as_slice()), t.1);
2198	}
2199	}
2200	}
2201
2202	#[cfg(all(feature = "serde", feature = "hardcoded-data", test))]
2203	mod serde_tests {
2204	use super::*;
2205	use serde_test::{assert_tokens, Token};
2206
2207	#[test]
2208	fn test_levels() {
2209	let text = "abc אבג";
2210	let bidi_info = BidiInfo::new(text, None);
2211	let levels = bidi_info.levels;
2212	assert_eq!(text.as_bytes().len(), `10`);
2213	assert_eq!(levels.len(), `10`);
2214	assert_tokens(
2215	&levels,
2216	&[
2217	Token::Seq { len: Some(`10`) },
2218	Token::NewtypeStruct { name: "Level" },
2219	Token::U8(`0`),
2220	Token::NewtypeStruct { name: "Level" },
2221	Token::U8(`0`),
2222	Token::NewtypeStruct { name: "Level" },
2223	Token::U8(`0`),
2224	Token::NewtypeStruct { name: "Level" },
2225	Token::U8(`0`),
2226	Token::NewtypeStruct { name: "Level" },
2227	Token::U8(`1`),
2228	Token::NewtypeStruct { name: "Level" },
2229	Token::U8(`1`),
2230	Token::NewtypeStruct { name: "Level" },
2231	Token::U8(`1`),
2232	Token::NewtypeStruct { name: "Level" },
2233	Token::U8(`1`),
2234	Token::NewtypeStruct { name: "Level" },
2235	Token::U8(`1`),
2236	Token::NewtypeStruct { name: "Level" },
2237	Token::U8(`1`),
2238	Token::SeqEnd,
2239	],
2240	);
2241	}
2242	}
2243