utf16.rs source code [crates/unicode_bidi/src/utf16.rs]

1	// Copyright 2023 The Mozilla Foundation. See the
2	// COPYRIGHT file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	use super::TextSource;
11
12	use alloc::borrow::Cow;
13	use alloc::vec::Vec;
14	use core::char;
15	use core::ops::Range;
16
17	use crate::{
18	compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
19	reorder_visual, visual_runs_for_line,
20	};
21	use crate::{
22	BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
23	};
24
25	#[cfg(feature = "hardcoded-data")]
26	use crate::HardcodedBidiData;
27
28	/// Initial bidi information of the text (UTF-16 version).
29	///
30	/// Contains the text paragraphs and `BidiClass` of its characters.
31	#[derive(PartialEq, Debug)]
32	pub struct InitialInfo<'text> {
33	/// The text
34	pub text: &'text [u16],
35
36	/// The BidiClass of the character at each code unit in the text.
37	/// If a character is multiple code units, its class will appear multiple times in the vector.
38	pub original_classes: Vec<BidiClass>,
39
40	/// The boundaries and level of each paragraph within the text.
41	pub paragraphs: Vec<ParagraphInfo>,
42	}
43
44	impl<'text> InitialInfo<'text> {
45	/// Find the paragraphs and BidiClasses in a string of text.
46	///
47	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
48	///
49	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
50	/// character is found before the matching PDI. If no strong character is found, the class will
51	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
52	///
53	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
54	#[cfg_attr(feature = "flame_it", flamer::flame)]
55	#[cfg(feature = "hardcoded-data")]
56	pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
57	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
58	}
59
60	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
61	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
62	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
63	///
64	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
65	///
66	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
67	/// character is found before the matching PDI. If no strong character is found, the class will
68	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
69	#[cfg_attr(feature = "flame_it", flamer::flame)]
70	pub fn new_with_data_source<'a, D: BidiDataSource>(
71	data_source: &D,
72	text: &'a [u16],
73	default_para_level: Option<Level>,
74	) -> InitialInfo<'a> {
75	InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
76	}
77	}
78
79	/// Extended version of InitialInfo (not public API).
80	#[derive(PartialEq, Debug)]
81	struct InitialInfoExt<'text> {
82	/// The base InitialInfo for the text, recording its paragraphs and bidi classes.
83	base: InitialInfo<'text>,
84
85	/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
86	/// requires no further bidi processing (i.e. there are no RTL characters or bidi
87	/// control codes present).
88	flags: Vec<ParagraphInfoFlags>,
89	}
90
91	impl<'text> InitialInfoExt<'text> {
92	/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
93	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
94	/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
95	///
96	/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
97	///
98	/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
99	/// character is found before the matching PDI. If no strong character is found, the class will
100	/// remain FSI, and it's up to later stages to treat these as LRI when needed.
101	#[cfg_attr(feature = "flame_it", flamer::flame)]
102	pub fn new_with_data_source<'a, D: BidiDataSource>(
103	data_source: &D,
104	text: &'a [u16],
105	default_para_level: Option<Level>,
106	) -> InitialInfoExt<'a> {
107	let mut paragraphs = Vec::<ParagraphInfo>::new();
108	let mut flags = Vec::<ParagraphInfoFlags>::new();
109	let (original_classes, _, _, _) = compute_initial_info(
110	data_source,
111	text,
112	default_para_level,
113	Some((&mut paragraphs, &mut flags)),
114	);
115
116	InitialInfoExt {
117	base: InitialInfo {
118	text,
119	original_classes,
120	paragraphs,
121	},
122	flags,
123	}
124	}
125	}
126
127	/// Bidi information of the text (UTF-16 version).
128	///
129	/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
130	/// character is multiple code units wide, then its class and level will appear multiple times in these
131	/// vectors.
132	// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
133	#[derive(Debug, PartialEq)]
134	pub struct BidiInfo<'text> {
135	/// The text
136	pub text: &'text [u16],
137
138	/// The BidiClass of the character at each byte in the text.
139	pub original_classes: Vec<BidiClass>,
140
141	/// The directional embedding level of each byte in the text.
142	pub levels: Vec<Level>,
143
144	/// The boundaries and paragraph embedding level of each paragraph within the text.
145	///
146	/// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
147	/// Or just don't include the first paragraph, which always starts at 0?
148	pub paragraphs: Vec<ParagraphInfo>,
149	}
150
151	impl<'text> BidiInfo<'text> {
152	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
153	///
154	///
155	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
156	///
157	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
158	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
159	///
160	/// TODO: Support auto-RTL base direction
161	#[cfg_attr(feature = "flame_it", flamer::flame)]
162	#[cfg(feature = "hardcoded-data")]
163	#[inline]
164	pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
165	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
166	}
167
168	/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
169	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
170	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
171	///
172	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
173	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
174	///
175	/// TODO: Support auto-RTL base direction
176	#[cfg_attr(feature = "flame_it", flamer::flame)]
177	pub fn new_with_data_source<'a, D: BidiDataSource>(
178	data_source: &D,
179	text: &'a [u16],
180	default_para_level: Option<Level>,
181	) -> BidiInfo<'a> {
182	let InitialInfoExt { base, flags, .. } =
183	InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
184
185	let mut levels = Vec::<Level>::with_capacity(text.len());
186	let mut processing_classes = base.original_classes.clone();
187
188	for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
189	let text = &text[para.range.clone()];
190	let original_classes = &base.original_classes[para.range.clone()];
191
192	compute_bidi_info_for_para(
193	data_source,
194	para,
195	flags.is_pure_ltr,
196	flags.has_isolate_controls,
197	text,
198	original_classes,
199	&mut processing_classes,
200	&mut levels,
201	);
202	}
203
204	BidiInfo {
205	text,
206	original_classes: base.original_classes,
207	paragraphs: base.paragraphs,
208	levels,
209	}
210	}
211
212	/// Produce the levels for this paragraph as needed for reordering, one level per byte
213	/// in the paragraph. The returned vector includes bytes that are not included
214	/// in the `line`, but will not adjust them.
215	///
216	/// This runs [Rule L1], you can run
217	/// [Rule L2] by calling [`Self::reorder_visual()`].
218	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
219	/// to avoid non-byte indices.
220	///
221	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
222	///
223	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
224	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
225	#[cfg_attr(feature = "flame_it", flamer::flame)]
226	pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
227	assert!(line.start <= self.levels.len());
228	assert!(line.end <= self.levels.len());
229
230	let mut levels = self.levels.clone();
231	let line_classes = &self.original_classes[line.clone()];
232	let line_levels = &mut levels[line.clone()];
233	let line_str: &[u16] = &self.text[line.clone()];
234
235	reorder_levels(line_classes, line_levels, line_str, para.level);
236
237	levels
238	}
239
240	/// Produce the levels for this paragraph as needed for reordering, one level per character
241	/// in the paragraph. The returned vector includes characters that are not included
242	/// in the `line`, but will not adjust them.
243	///
244	/// This runs [Rule L1], you can run
245	/// [Rule L2] by calling [`Self::reorder_visual()`].
246	/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
247	/// to avoid non-byte indices.
248	///
249	/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
250	///
251	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
252	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
253	#[cfg_attr(feature = "flame_it", flamer::flame)]
254	pub fn reordered_levels_per_char(
255	&self,
256	para: &ParagraphInfo,
257	line: Range<usize>,
258	) -> Vec<Level> {
259	let levels = self.reordered_levels(para, line);
260	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
261	}
262
263	/// Re-order a line based on resolved levels and return the line in display order.
264	///
265	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
266	///
267	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
268	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
269	#[cfg_attr(feature = "flame_it", flamer::flame)]
270	pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
271	if !level::has_rtl(&self.levels[line.clone()]) {
272	return self.text[line].into();
273	}
274	let (levels, runs) = self.visual_runs(para, line.clone());
275	reorder_line(self.text, line, levels, runs)
276	}
277
278	/// Reorders pre-calculated levels of a sequence of characters.
279	///
280	/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
281	/// intended to be used when an application has determined the levels of the objects (character sequences)
282	/// and just needs to have them reordered.
283	///
284	/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
285	///
286	/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
287	/// information about the actual text.
288	///
289	/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
290	/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
291	/// is for a single code point.
292	///
293	///
294	/// # # Example
295	/// ```
296	/// use unicode_bidi::BidiInfo;
297	/// use unicode_bidi::Level;
298	///
299	/// let l0 = Level::from(`0`);
300	/// let l1 = Level::from(`1`);
301	/// let l2 = Level::from(`2`);
302	///
303	/// let levels = vec![l0, l0, l0, l0];
304	/// let index_map = BidiInfo::reorder_visual(&levels);
305	/// assert_eq!(levels.len(), index_map.len());
306	/// assert_eq!(index_map, [`0`, `1`, `2`, `3`]);
307	///
308	/// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
309	/// let index_map = BidiInfo::reorder_visual(&levels);
310	/// assert_eq!(levels.len(), index_map.len());
311	/// assert_eq!(index_map, [`0`, `1`, `2`, `6`, `7`, `5`, `4`, `3`]);
312	/// ```
313	#[cfg_attr(feature = "flame_it", flamer::flame)]
314	#[inline]
315	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
316	reorder_visual(levels)
317	}
318
319	/// Find the level runs within a line and return them in visual order.
320	///
321	/// `line` is a range of bytes indices within `levels`.
322	///
323	/// The first return value is a vector of levels used by the reordering algorithm,
324	/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
325	/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
326	/// same level) should be displayed. Within each run, the display order can be checked
327	/// against the Level vector.
328	///
329	/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
330	/// as that should be handled by the engine using this API.
331	///
332	/// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
333	/// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
334	/// of producing a level map, since one may wish to deal with the fact that this is operating on
335	/// byte rather than character indices.
336	///
337	/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
338	///
339	/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
340	/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
341	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
342	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
343	#[cfg_attr(feature = "flame_it", flamer::flame)]
344	#[inline]
345	pub fn visual_runs(
346	&self,
347	para: &ParagraphInfo,
348	line: Range<usize>,
349	) -> (Vec<Level>, Vec<LevelRun>) {
350	let levels = self.reordered_levels(para, line.clone());
351	visual_runs_for_line(levels, &line)
352	}
353
354	/// If processed text has any computed RTL levels
355	///
356	/// This information is usually used to skip re-ordering of text when no RTL level is present
357	#[inline]
358	pub fn has_rtl(&self) -> bool {
359	level::has_rtl(&self.levels)
360	}
361	}
362
363	/// Bidi information of text treated as a single paragraph.
364	///
365	/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
366	/// character is multiple code units wide, then its class and level will appear multiple times in these
367	/// vectors.
368	#[derive(Debug, PartialEq)]
369	pub struct ParagraphBidiInfo<'text> {
370	/// The text
371	pub text: &'text [u16],
372
373	/// The BidiClass of the character at each byte in the text.
374	pub original_classes: Vec<BidiClass>,
375
376	/// The directional embedding level of each byte in the text.
377	pub levels: Vec<Level>,
378
379	/// The paragraph embedding level.
380	pub paragraph_level: Level,
381
382	/// Whether the paragraph is purely LTR.
383	pub is_pure_ltr: bool,
384	}
385
386	impl<'text> ParagraphBidiInfo<'text> {
387	/// Determine the bidi embedding level.
388	///
389	///
390	/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
391	///
392	/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
393	/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
394	///
395	/// TODO: Support auto-RTL base direction
396	#[cfg_attr(feature = "flame_it", flamer::flame)]
397	#[cfg(feature = "hardcoded-data")]
398	#[inline]
399	pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
400	Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
401	}
402
403	/// Determine the bidi embedding level, with a custom [`BidiDataSource`]
404	/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
405	/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
406	///
407	/// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
408	/// and should be kept in sync with it.
409	#[cfg_attr(feature = "flame_it", flamer::flame)]
410	pub fn new_with_data_source<'a, D: BidiDataSource>(
411	data_source: &D,
412	text: &'a [u16],
413	default_para_level: Option<Level>,
414	) -> ParagraphBidiInfo<'a> {
415	// Here we could create a ParagraphInitialInfo struct to parallel the one
416	// used by BidiInfo, but there doesn't seem any compelling reason for it.
417	let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
418	compute_initial_info(data_source, text, default_para_level, None);
419
420	let mut levels = Vec::<Level>::with_capacity(text.len());
421	let mut processing_classes = original_classes.clone();
422
423	let para_info = ParagraphInfo {
424	range: Range {
425	start: `0`,
426	end: text.len(),
427	},
428	level: paragraph_level,
429	};
430
431	compute_bidi_info_for_para(
432	data_source,
433	&para_info,
434	is_pure_ltr,
435	has_isolate_controls,
436	text,
437	&original_classes,
438	&mut processing_classes,
439	&mut levels,
440	);
441
442	ParagraphBidiInfo {
443	text,
444	original_classes,
445	levels,
446	paragraph_level,
447	is_pure_ltr,
448	}
449	}
450
451	/// Produce the levels for this paragraph as needed for reordering, one level per code unit
452	/// in the paragraph. The returned vector includes code units that are not included
453	/// in the `line`, but will not adjust them.
454	///
455	/// See BidiInfo::reordered_levels for details.
456	///
457	/// (This should be kept in sync with BidiInfo::reordered_levels.)
458	#[cfg_attr(feature = "flame_it", flamer::flame)]
459	pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
460	assert!(line.start <= self.levels.len());
461	assert!(line.end <= self.levels.len());
462
463	let mut levels = self.levels.clone();
464	let line_classes = &self.original_classes[line.clone()];
465	let line_levels = &mut levels[line.clone()];
466
467	reorder_levels(
468	line_classes,
469	line_levels,
470	self.text.subrange(line),
471	self.paragraph_level,
472	);
473
474	levels
475	}
476
477	/// Produce the levels for this paragraph as needed for reordering, one level per character
478	/// in the paragraph. The returned vector includes characters that are not included
479	/// in the `line`, but will not adjust them.
480	///
481	/// See BidiInfo::reordered_levels_per_char for details.
482	///
483	/// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
484	#[cfg_attr(feature = "flame_it", flamer::flame)]
485	pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
486	let levels = self.reordered_levels(line);
487	self.text.char_indices().map(\|(i, _)\| levels[i]).collect()
488	}
489
490	/// Re-order a line based on resolved levels and return the line in display order.
491	///
492	/// See BidiInfo::reorder_line for details.
493	///
494	/// (This should be kept in sync with BidiInfo::reorder_line.)
495	#[cfg_attr(feature = "flame_it", flamer::flame)]
496	pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
497	if !level::has_rtl(&self.levels[line.clone()]) {
498	return self.text[line].into();
499	}
500	let (levels, runs) = self.visual_runs(line.clone());
501	reorder_line(self.text, line, levels, runs)
502	}
503
504	/// Reorders pre-calculated levels of a sequence of characters.
505	///
506	/// See BidiInfo::reorder_visual for details.
507	#[cfg_attr(feature = "flame_it", flamer::flame)]
508	#[inline]
509	pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
510	reorder_visual(levels)
511	}
512
513	/// Find the level runs within a line and return them in visual order.
514	///
515	/// `line` is a range of code-unit indices within `levels`.
516	///
517	/// See `BidiInfo::visual_runs` for details.
518	///
519	/// (This should be kept in sync with BidiInfo::visual_runs.)
520	#[cfg_attr(feature = "flame_it", flamer::flame)]
521	#[inline]
522	pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
523	let levels = self.reordered_levels(line.clone());
524	visual_runs_for_line(levels, &line)
525	}
526
527	/// If processed text has any computed RTL levels
528	///
529	/// This information is usually used to skip re-ordering of text when no RTL level is present
530	#[inline]
531	pub fn has_rtl(&self) -> bool {
532	!self.is_pure_ltr
533	}
534
535	/// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
536	#[inline]
537	pub fn direction(&self) -> Direction {
538	para_direction(&self.levels)
539	}
540	}
541
542	/// Return a line of the text in display order based on resolved levels.
543	///
544	/// `text` the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
545	/// `line` a range of byte indices within `text` corresponding to one line
546	/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
547	/// `runs` array of `LevelRun`s in visual order
548	///
549	/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
550	/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
551	///
552	/// Returns: the reordered text of the line.
553	///
554	/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
555	///
556	/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
557	/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
558	fn reorder_line(
559	text: &[u16],
560	line: Range<usize>,
561	levels: Vec<Level>,
562	runs: Vec<LevelRun>,
563	) -> Cow<'_, [u16]> {
564	// If all isolating run sequences are LTR, no reordering is needed
565	if runs.iter().all(\|run: &Range\| levels[run.start].is_ltr()) {
566	return text[line].into();
567	}
568
569	let mut result: Vec = Vec::<u16>::with_capacity(line.len());
570	for run: Range in runs {
571	if levels[run.start].is_rtl() {
572	let mut buf: [u16; 2] = [`0`; `2`];
573	for c: char in text[run].chars().rev() {
574	result.extend(c.encode_utf16(&mut buf).iter());
575	}
576	} else {
577	result.extend(text[run].iter());
578	}
579	}
580	result.into()
581	}
582
583	/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
584	/// And it supports all operation in the `Paragraph` that needs also its
585	/// `BidiInfo` such as `direction`.
586	#[derive(Debug)]
587	pub struct Paragraph<'a, 'text> {
588	pub info: &'a BidiInfo<'text>,
589	pub para: &'a ParagraphInfo,
590	}
591
592	impl<'a, 'text> Paragraph<'a, 'text> {
593	#[inline]
594	pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
595	Paragraph { info, para }
596	}
597
598	/// Returns if the paragraph is Left direction, right direction or mixed.
599	#[inline]
600	pub fn direction(&self) -> Direction {
601	para_direction(&self.info.levels[self.para.range.clone()])
602	}
603
604	/// Returns the `Level` of a certain character in the paragraph.
605	#[inline]
606	pub fn level_at(&self, pos: usize) -> Level {
607	let actual_position: usize = self.para.range.start + pos;
608	self.info.levels[actual_position]
609	}
610	}
611
612	/// Implementation of TextSource for UTF-16 text in a [u16] array.
613	/// Note that there could be unpaired surrogates present!
614
615	// Convenience functions to check whether a UTF16 code unit is a surrogate.
616	#[inline]
617	fn is_high_surrogate(code: u16) -> bool {
618	(code & `0xFC00`) == `0xD800`
619	}
620	#[inline]
621	fn is_low_surrogate(code: u16) -> bool {
622	(code & `0xFC00`) == `0xDC00`
623	}
624
625	impl<'text> TextSource<'text> for [u16] {
626	type CharIter = Utf16CharIter<'text>;
627	type CharIndexIter = Utf16CharIndexIter<'text>;
628	type IndexLenIter = Utf16IndexLenIter<'text>;
629
630	#[inline]
631	fn len(&self) -> usize {
632	(self as &[u16]).len()
633	}
634	fn char_at(&self, index: usize) -> Option<(char, usize)> {
635	if index >= self.len() {
636	return None;
637	}
638	// Get the indicated code unit and try simply converting it to a char;
639	// this will fail if it is half of a surrogate pair.
640	let c = self[index];
641	if let Some(ch) = char::from_u32(c.into()) {
642	return Some((ch, `1`));
643	}
644	// If it's a low surrogate, and was immediately preceded by a high surrogate,
645	// then we're in the middle of a (valid) character, and should return None.
646	if is_low_surrogate(c) && index > `0` && is_high_surrogate(self[index - `1`]) {
647	return None;
648	}
649	// Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
650	if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
651	if let Ok(ch) = ch {
652	// This must be a surrogate pair, otherwise char::from_u32() above should
653	// have succeeded!
654	debug_assert!(ch.len_utf16() == `2`, "BMP should have already been handled");
655	return Some((ch, ch.len_utf16()));
656	}
657	} else {
658	debug_assert!(
659	`false`,
660	"Why did decode_utf16 return None when we're not at the end?"
661	);
662	return None;
663	}
664	// Failed to decode UTF-16: we must have encountered an unpaired surrogate.
665	// Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
666	// and keep indexing correct.
667	Some((char::REPLACEMENT_CHARACTER, `1`))
668	}
669	#[inline]
670	fn subrange(&self, range: Range<usize>) -> &Self {
671	&(self as &[u16])[range]
672	}
673	#[inline]
674	fn chars(&'text self) -> Self::CharIter {
675	Utf16CharIter::new(self)
676	}
677	#[inline]
678	fn char_indices(&'text self) -> Self::CharIndexIter {
679	Utf16CharIndexIter::new(self)
680	}
681	#[inline]
682	fn indices_lengths(&'text self) -> Self::IndexLenIter {
683	Utf16IndexLenIter::new(self)
684	}
685	#[inline]
686	fn char_len(ch: char) -> usize {
687	ch.len_utf16()
688	}
689	}
690
691	/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
692	#[derive(Debug)]
693	pub struct Utf16IndexLenIter<'text> {
694	text: &'text [u16],
695	cur_pos: usize,
696	}
697
698	impl<'text> Utf16IndexLenIter<'text> {
699	#[inline]
700	pub fn new(text: &'text [u16]) -> Self {
701	Utf16IndexLenIter { text, cur_pos: `0` }
702	}
703	}
704
705	impl Iterator for Utf16IndexLenIter<'_> {
706	type Item = (usize, usize);
707
708	#[inline]
709	fn next(&mut self) -> Option<Self::Item> {
710	if let Some((_, char_len: usize)) = self.text.char_at(self.cur_pos) {
711	let result: (usize, usize) = (self.cur_pos, char_len);
712	self.cur_pos += char_len;
713	return Some(result);
714	}
715	None
716	}
717	}
718
719	/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
720	#[derive(Debug)]
721	pub struct Utf16CharIndexIter<'text> {
722	text: &'text [u16],
723	cur_pos: usize,
724	}
725
726	impl<'text> Utf16CharIndexIter<'text> {
727	pub fn new(text: &'text [u16]) -> Self {
728	Utf16CharIndexIter { text, cur_pos: `0` }
729	}
730	}
731
732	impl Iterator for Utf16CharIndexIter<'_> {
733	type Item = (usize, char);
734
735	fn next(&mut self) -> Option<Self::Item> {
736	if let Some((ch: char, char_len: usize)) = self.text.char_at(self.cur_pos) {
737	let result: (usize, char) = (self.cur_pos, ch);
738	self.cur_pos += char_len;
739	return Some(result);
740	}
741	None
742	}
743	}
744
745	/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
746	/// (Unlike the other iterators above, this also supports reverse iteration.)
747	#[derive(Debug)]
748	pub struct Utf16CharIter<'text> {
749	text: &'text [u16],
750	cur_pos: usize,
751	end_pos: usize,
752	}
753
754	impl<'text> Utf16CharIter<'text> {
755	pub fn new(text: &'text [u16]) -> Self {
756	Utf16CharIter {
757	text,
758	cur_pos: `0`,
759	end_pos: text.len(),
760	}
761	}
762	}
763
764	impl Iterator for Utf16CharIter<'_> {
765	type Item = char;
766
767	fn next(&mut self) -> Option<Self::Item> {
768	if let Some((ch: char, char_len: usize)) = self.text.char_at(self.cur_pos) {
769	self.cur_pos += char_len;
770	return Some(ch);
771	}
772	None
773	}
774	}
775
776	impl DoubleEndedIterator for Utf16CharIter<'_> {
777	fn next_back(&mut self) -> Option<Self::Item> {
778	if self.end_pos <= self.cur_pos {
779	return None;
780	}
781	self.end_pos -= `1`;
782	if let Some(ch: char) = char::from_u32(self.text[self.end_pos] as u32) {
783	return Some(ch);
784	}
785	if self.end_pos > self.cur_pos {
786	if let Some((ch: char, char_len: usize)) = self.text.char_at(self.end_pos - `1`) {
787	if char_len == `2` {
788	self.end_pos -= `1`;
789	return Some(ch);
790	}
791	}
792	}
793	Some(char::REPLACEMENT_CHARACTER)
794	}
795	}
796