1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! 3.3.4 - 3.3.6. Resolve implicit levels and types. |
11 | |
12 | #[cfg (not(feature = "smallvec" ))] |
13 | use alloc::vec::Vec; |
14 | use core::cmp::max; |
15 | #[cfg (feature = "smallvec" )] |
16 | use smallvec::SmallVec; |
17 | |
18 | use super::char_data::BidiClass::{self, *}; |
19 | use super::level::Level; |
20 | use super::prepare::{not_removed_by_x9, IsolatingRunSequence}; |
21 | use super::{BidiDataSource, TextSource}; |
22 | |
23 | /// 3.3.4 Resolving Weak Types |
24 | /// |
25 | /// <http://www.unicode.org/reports/tr9/#Resolving_Weak_Types> |
26 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
27 | pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( |
28 | text: &'a T, |
29 | sequence: &IsolatingRunSequence, |
30 | processing_classes: &mut [BidiClass], |
31 | ) { |
32 | // Note: The spec treats these steps as individual passes that are applied one after the other |
33 | // on the entire IsolatingRunSequence at once. We instead collapse it into a single iteration, |
34 | // which is straightforward for rules that are based on the state of the current character, but not |
35 | // for rules that care about surrounding characters. To deal with them, we retain additional state |
36 | // about previous character classes that may have since been changed by later rules. |
37 | |
38 | // The previous class for the purposes of rule W4/W6, not tracking changes made after or during W4. |
39 | let mut prev_class_before_w4 = sequence.sos; |
40 | // The previous class for the purposes of rule W5. |
41 | let mut prev_class_before_w5 = sequence.sos; |
42 | // The previous class for the purposes of rule W1, not tracking changes from any other rules. |
43 | let mut prev_class_before_w1 = sequence.sos; |
44 | let mut last_strong_is_al = false; |
45 | #[cfg (feature = "smallvec" )] |
46 | let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 |
47 | #[cfg (not(feature = "smallvec" ))] |
48 | let mut et_run_indices = Vec::new(); // for W5 |
49 | #[cfg (feature = "smallvec" )] |
50 | let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
51 | #[cfg (not(feature = "smallvec" ))] |
52 | let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
53 | |
54 | for (run_index, level_run) in sequence.runs.iter().enumerate() { |
55 | for i in &mut level_run.clone() { |
56 | if processing_classes[i] == BN { |
57 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
58 | // Keeps track of bn runs for W5 in case we see an ET. |
59 | bn_run_indices.push(i); |
60 | // BNs aren't real, skip over them. |
61 | continue; |
62 | } |
63 | |
64 | // Store the processing class of all rules before W2/W1. |
65 | // Used to keep track of the last strong character for W2. W3 is able to insert new strong |
66 | // characters, so we don't want to be misled by it. |
67 | let mut w2_processing_class = processing_classes[i]; |
68 | |
69 | // <http://www.unicode.org/reports/tr9/#W1> |
70 | // |
71 | |
72 | if processing_classes[i] == NSM { |
73 | processing_classes[i] = match prev_class_before_w1 { |
74 | RLI | LRI | FSI | PDI => ON, |
75 | _ => prev_class_before_w1, |
76 | }; |
77 | // W1 occurs before W2, update this. |
78 | w2_processing_class = processing_classes[i]; |
79 | } |
80 | |
81 | prev_class_before_w1 = processing_classes[i]; |
82 | |
83 | // <http://www.unicode.org/reports/tr9/#W2> |
84 | // <http://www.unicode.org/reports/tr9/#W3> |
85 | // |
86 | match processing_classes[i] { |
87 | EN => { |
88 | if last_strong_is_al { |
89 | // W2. If previous strong char was AL, change EN to AN. |
90 | processing_classes[i] = AN; |
91 | } |
92 | } |
93 | // W3. |
94 | AL => processing_classes[i] = R, |
95 | _ => {} |
96 | } |
97 | |
98 | // update last_strong_is_al. |
99 | match w2_processing_class { |
100 | L | R => { |
101 | last_strong_is_al = false; |
102 | } |
103 | AL => { |
104 | last_strong_is_al = true; |
105 | } |
106 | _ => {} |
107 | } |
108 | |
109 | let class_before_w456 = processing_classes[i]; |
110 | |
111 | // <http://www.unicode.org/reports/tr9/#W4> |
112 | // <http://www.unicode.org/reports/tr9/#W5> |
113 | // <http://www.unicode.org/reports/tr9/#W6> (separators only) |
114 | // (see below for W6 terminator code) |
115 | // |
116 | match processing_classes[i] { |
117 | // <http://www.unicode.org/reports/tr9/#W6> |
118 | EN => { |
119 | // W5. If a run of ETs is adjacent to an EN, change the ETs to EN. |
120 | for j in &et_run_indices { |
121 | processing_classes[*j] = EN; |
122 | } |
123 | et_run_indices.clear(); |
124 | } |
125 | |
126 | // <http://www.unicode.org/reports/tr9/#W4> |
127 | // <http://www.unicode.org/reports/tr9/#W6> |
128 | ES | CS => { |
129 | // See https://github.com/servo/unicode-bidi/issues/86 for improving this. |
130 | // We want to make sure we check the correct next character by skipping past the rest |
131 | // of this one. |
132 | if let Some((_, char_len)) = text.char_at(i) { |
133 | let mut next_class = sequence |
134 | .iter_forwards_from(i + char_len, run_index) |
135 | .map(|j| processing_classes[j]) |
136 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
137 | .find(not_removed_by_x9) |
138 | .unwrap_or(sequence.eos); |
139 | if next_class == EN && last_strong_is_al { |
140 | // Apply W2 to next_class. We know that last_strong_is_al |
141 | // has no chance of changing on this character so we can still assume its value |
142 | // will be the same by the time we get to it. |
143 | next_class = AN; |
144 | } |
145 | processing_classes[i] = |
146 | match (prev_class_before_w4, processing_classes[i], next_class) { |
147 | // W4 |
148 | (EN, ES, EN) | (EN, CS, EN) => EN, |
149 | // W4 |
150 | (AN, CS, AN) => AN, |
151 | // W6 (separators only) |
152 | (_, _, _) => ON, |
153 | }; |
154 | |
155 | // W6 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
156 | // We have to do this before W5 gets its grubby hands on these characters and thinks |
157 | // they're part of an ET run. |
158 | // We check for ON to ensure that we had hit the W6 branch above, since this `ES | CS` match |
159 | // arm handles both W4 and W6. |
160 | if processing_classes[i] == ON { |
161 | for idx in sequence.iter_backwards_from(i, run_index) { |
162 | let class = &mut processing_classes[idx]; |
163 | if *class != BN { |
164 | break; |
165 | } |
166 | *class = ON; |
167 | } |
168 | for idx in sequence.iter_forwards_from(i + char_len, run_index) { |
169 | let class = &mut processing_classes[idx]; |
170 | if *class != BN { |
171 | break; |
172 | } |
173 | *class = ON; |
174 | } |
175 | } |
176 | } else { |
177 | // We're in the middle of a character, copy over work done for previous bytes |
178 | // since it's going to be the same answer. |
179 | processing_classes[i] = processing_classes[i - 1]; |
180 | } |
181 | } |
182 | // <http://www.unicode.org/reports/tr9/#W5> |
183 | ET => { |
184 | match prev_class_before_w5 { |
185 | EN => processing_classes[i] = EN, |
186 | _ => { |
187 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
188 | // If there was a BN run before this, that's now a part of this ET run. |
189 | et_run_indices.extend(bn_run_indices.clone()); |
190 | |
191 | // In case this is followed by an EN. |
192 | et_run_indices.push(i); |
193 | } |
194 | } |
195 | } |
196 | _ => {} |
197 | } |
198 | |
199 | // Common loop iteration code |
200 | // |
201 | |
202 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
203 | // BN runs would have already continued the loop, clear them before we get to the next one. |
204 | bn_run_indices.clear(); |
205 | |
206 | // W6 above only deals with separators, so it doesn't change anything W5 cares about, |
207 | // so we still can update this after running that part of W6. |
208 | prev_class_before_w5 = processing_classes[i]; |
209 | |
210 | // <http://www.unicode.org/reports/tr9/#W6> (terminators only) |
211 | // (see above for W6 separator code) |
212 | // |
213 | if prev_class_before_w5 != ET { |
214 | // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. |
215 | for j in &et_run_indices { |
216 | processing_classes[*j] = ON; |
217 | } |
218 | et_run_indices.clear(); |
219 | } |
220 | |
221 | // We stashed this before W4/5/6 could get their grubby hands on it, and it's not |
222 | // used in the W6 terminator code below so we can update it now. |
223 | prev_class_before_w4 = class_before_w456; |
224 | } |
225 | } |
226 | // Rerun this check in case we ended with a sequence of BNs (i.e., we'd never |
227 | // hit the end of the for loop above). |
228 | // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. |
229 | for j in &et_run_indices { |
230 | processing_classes[*j] = ON; |
231 | } |
232 | et_run_indices.clear(); |
233 | |
234 | // W7. If the previous strong char was L, change EN to L. |
235 | let mut last_strong_is_l = sequence.sos == L; |
236 | for i in sequence.runs.iter().cloned().flatten() { |
237 | match processing_classes[i] { |
238 | EN if last_strong_is_l => { |
239 | processing_classes[i] = L; |
240 | } |
241 | L => { |
242 | last_strong_is_l = true; |
243 | } |
244 | R | AL => { |
245 | last_strong_is_l = false; |
246 | } |
247 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
248 | // Already scanning past BN here. |
249 | _ => {} |
250 | } |
251 | } |
252 | } |
253 | |
254 | #[cfg (feature = "smallvec" )] |
255 | type BracketPairVec = SmallVec<[BracketPair; 8]>; |
256 | #[cfg (not(feature = "smallvec" ))] |
257 | type BracketPairVec = Vec<BracketPair>; |
258 | |
259 | /// 3.3.5 Resolving Neutral Types |
260 | /// |
261 | /// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types> |
262 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
263 | pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( |
264 | text: &'a T, |
265 | data_source: &D, |
266 | sequence: &IsolatingRunSequence, |
267 | levels: &[Level], |
268 | original_classes: &[BidiClass], |
269 | processing_classes: &mut [BidiClass], |
270 | ) { |
271 | // e = embedding direction |
272 | let e: BidiClass = levels[sequence.runs[0].start].bidi_class(); |
273 | let not_e = if e == BidiClass::L { |
274 | BidiClass::R |
275 | } else { |
276 | BidiClass::L |
277 | }; |
278 | // N0. Process bracket pairs. |
279 | |
280 | // > Identify the bracket pairs in the current isolating run sequence according to BD16. |
281 | // We use processing_classes, not original_classes, due to BD14/BD15 |
282 | let mut bracket_pairs = BracketPairVec::new(); |
283 | identify_bracket_pairs( |
284 | text, |
285 | data_source, |
286 | sequence, |
287 | processing_classes, |
288 | &mut bracket_pairs, |
289 | ); |
290 | |
291 | // > For each bracket-pair element in the list of pairs of text positions |
292 | // |
293 | // Note: Rust ranges are interpreted as [start..end), be careful using `pair` directly |
294 | // for indexing as it will include the opening bracket pair but not the closing one. |
295 | for pair in bracket_pairs { |
296 | #[cfg (feature = "std" )] |
297 | debug_assert!( |
298 | pair.start < processing_classes.len(), |
299 | "identify_bracket_pairs returned a range that is out of bounds!" |
300 | ); |
301 | #[cfg (feature = "std" )] |
302 | debug_assert!( |
303 | pair.end < processing_classes.len(), |
304 | "identify_bracket_pairs returned a range that is out of bounds!" |
305 | ); |
306 | let mut found_e = false; |
307 | let mut found_not_e = false; |
308 | let mut class_to_set = None; |
309 | |
310 | let start_char_len = |
311 | T::char_len(text.subrange(pair.start..pair.end).chars().next().unwrap()); |
312 | // > Inspect the bidirectional types of the characters enclosed within the bracket pair. |
313 | // |
314 | // `pair` is [start, end) so we will end up processing the opening character but not the closing one. |
315 | // |
316 | for enclosed_i in sequence.iter_forwards_from(pair.start + start_char_len, pair.start_run) { |
317 | if enclosed_i >= pair.end { |
318 | #[cfg (feature = "std" )] |
319 | debug_assert!( |
320 | enclosed_i == pair.end, |
321 | "If we skipped past this, the iterator is broken" |
322 | ); |
323 | break; |
324 | } |
325 | let class = processing_classes[enclosed_i]; |
326 | if class == e { |
327 | found_e = true; |
328 | } else if class == not_e { |
329 | found_not_e = true; |
330 | } else if matches!(class, BidiClass::EN | BidiClass::AN) { |
331 | // > Within this scope, bidirectional types EN and AN are treated as R. |
332 | if e == BidiClass::L { |
333 | found_not_e = true; |
334 | } else { |
335 | found_e = true; |
336 | } |
337 | } |
338 | |
339 | // If we have found a character with the class of the embedding direction |
340 | // we can bail early. |
341 | if found_e { |
342 | break; |
343 | } |
344 | } |
345 | // > If any strong type (either L or R) matching the embedding direction is found |
346 | if found_e { |
347 | // > .. set the type for both brackets in the pair to match the embedding direction |
348 | class_to_set = Some(e); |
349 | // > Otherwise, if there is a strong type it must be opposite the embedding direction |
350 | } else if found_not_e { |
351 | // > Therefore, test for an established context with a preceding strong type by |
352 | // > checking backwards before the opening paired bracket |
353 | // > until the first strong type (L, R, or sos) is found. |
354 | // (see note above about processing_classes and character boundaries) |
355 | let mut previous_strong = sequence |
356 | .iter_backwards_from(pair.start, pair.start_run) |
357 | .map(|i| processing_classes[i]) |
358 | .find(|class| { |
359 | matches!( |
360 | class, |
361 | BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN |
362 | ) |
363 | }) |
364 | .unwrap_or(sequence.sos); |
365 | |
366 | // > Within this scope, bidirectional types EN and AN are treated as R. |
367 | if matches!(previous_strong, BidiClass::EN | BidiClass::AN) { |
368 | previous_strong = BidiClass::R; |
369 | } |
370 | |
371 | // > If the preceding strong type is also opposite the embedding direction, |
372 | // > context is established, |
373 | // > so set the type for both brackets in the pair to that direction. |
374 | // AND |
375 | // > Otherwise set the type for both brackets in the pair to the embedding direction. |
376 | // > Either way it gets set to previous_strong |
377 | // |
378 | // Both branches amount to setting the type to the strong type. |
379 | class_to_set = Some(previous_strong); |
380 | } |
381 | |
382 | if let Some(class_to_set) = class_to_set { |
383 | // Update all processing classes corresponding to the start and end elements, as requested. |
384 | // We should include all bytes of the character, not the first one. |
385 | let end_char_len = |
386 | T::char_len(text.subrange(pair.end..text.len()).chars().next().unwrap()); |
387 | for class in &mut processing_classes[pair.start..pair.start + start_char_len] { |
388 | *class = class_to_set; |
389 | } |
390 | for class in &mut processing_classes[pair.end..pair.end + end_char_len] { |
391 | *class = class_to_set; |
392 | } |
393 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
394 | for idx in sequence.iter_backwards_from(pair.start, pair.start_run) { |
395 | let class = &mut processing_classes[idx]; |
396 | if *class != BN { |
397 | break; |
398 | } |
399 | *class = class_to_set; |
400 | } |
401 | // > Any number of characters that had original bidirectional character type NSM prior to the application of |
402 | // > W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match the type of their preceding bracket. |
403 | |
404 | // This rule deals with sequences of NSMs, so we can just update them all at once, we don't need to worry |
405 | // about character boundaries. We do need to be careful to skip the full set of bytes for the parentheses characters. |
406 | let nsm_start = pair.start + start_char_len; |
407 | for idx in sequence.iter_forwards_from(nsm_start, pair.start_run) { |
408 | let class = original_classes[idx]; |
409 | if class == BidiClass::NSM || processing_classes[idx] == BN { |
410 | processing_classes[idx] = class_to_set; |
411 | } else { |
412 | break; |
413 | } |
414 | } |
415 | let nsm_end = pair.end + end_char_len; |
416 | for idx in sequence.iter_forwards_from(nsm_end, pair.end_run) { |
417 | let class = original_classes[idx]; |
418 | if class == BidiClass::NSM || processing_classes[idx] == BN { |
419 | processing_classes[idx] = class_to_set; |
420 | } else { |
421 | break; |
422 | } |
423 | } |
424 | } |
425 | // > Otherwise, there are no strong types within the bracket pair |
426 | // > Therefore, do not set the type for that bracket pair |
427 | } |
428 | |
429 | // N1 and N2. |
430 | // Indices of every byte in this isolating run sequence |
431 | let mut indices = sequence.runs.iter().flat_map(Clone::clone); |
432 | let mut prev_class = sequence.sos; |
433 | while let Some(mut i) = indices.next() { |
434 | // Process sequences of NI characters. |
435 | #[cfg (feature = "smallvec" )] |
436 | let mut ni_run = SmallVec::<[usize; 8]>::new(); |
437 | #[cfg (not(feature = "smallvec" ))] |
438 | let mut ni_run = Vec::new(); |
439 | // The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
440 | if is_NI(processing_classes[i]) || processing_classes[i] == BN { |
441 | // Consume a run of consecutive NI characters. |
442 | ni_run.push(i); |
443 | let mut next_class; |
444 | loop { |
445 | match indices.next() { |
446 | Some(j) => { |
447 | i = j; |
448 | next_class = processing_classes[j]; |
449 | // The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
450 | if is_NI(next_class) || next_class == BN { |
451 | ni_run.push(i); |
452 | } else { |
453 | break; |
454 | } |
455 | } |
456 | None => { |
457 | next_class = sequence.eos; |
458 | break; |
459 | } |
460 | }; |
461 | } |
462 | // N1-N2. |
463 | // |
464 | // <http://www.unicode.org/reports/tr9/#N1> |
465 | // <http://www.unicode.org/reports/tr9/#N2> |
466 | let new_class = match (prev_class, next_class) { |
467 | (L, L) => L, |
468 | (R, R) |
469 | | (R, AN) |
470 | | (R, EN) |
471 | | (AN, R) |
472 | | (AN, AN) |
473 | | (AN, EN) |
474 | | (EN, R) |
475 | | (EN, AN) |
476 | | (EN, EN) => R, |
477 | (_, _) => e, |
478 | }; |
479 | for j in &ni_run { |
480 | processing_classes[*j] = new_class; |
481 | } |
482 | ni_run.clear(); |
483 | } |
484 | prev_class = processing_classes[i]; |
485 | } |
486 | } |
487 | |
488 | struct BracketPair { |
489 | /// The text-relative index of the opening bracket. |
490 | start: usize, |
491 | /// The text-relative index of the closing bracket. |
492 | end: usize, |
493 | /// The index of the run (in the run sequence) that the opening bracket is in. |
494 | start_run: usize, |
495 | /// The index of the run (in the run sequence) that the closing bracket is in. |
496 | end_run: usize, |
497 | } |
498 | /// 3.1.3 Identifying Bracket Pairs |
499 | /// |
500 | /// Returns all paired brackets in the source, as indices into the |
501 | /// text source. |
502 | /// |
503 | /// <https://www.unicode.org/reports/tr9/#BD16> |
504 | fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( |
505 | text: &'a T, |
506 | data_source: &D, |
507 | run_sequence: &IsolatingRunSequence, |
508 | original_classes: &[BidiClass], |
509 | bracket_pairs: &mut BracketPairVec, |
510 | ) { |
511 | #[cfg (feature = "smallvec" )] |
512 | let mut stack = SmallVec::<[(char, usize, usize); 8]>::new(); |
513 | #[cfg (not(feature = "smallvec" ))] |
514 | let mut stack = Vec::new(); |
515 | |
516 | for (run_index, level_run) in run_sequence.runs.iter().enumerate() { |
517 | for (i, ch) in text.subrange(level_run.clone()).char_indices() { |
518 | let actual_index = level_run.start + i; |
519 | |
520 | // All paren characters are ON. |
521 | // From BidiBrackets.txt: |
522 | // > The Unicode property value stability policy guarantees that characters |
523 | // > which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y |
524 | if original_classes[actual_index] != BidiClass::ON { |
525 | continue; |
526 | } |
527 | |
528 | if let Some(matched) = data_source.bidi_matched_opening_bracket(ch) { |
529 | if matched.is_open { |
530 | // > If an opening paired bracket is found ... |
531 | |
532 | // > ... and there is no room in the stack, |
533 | // > stop processing BD16 for the remainder of the isolating run sequence. |
534 | if stack.len() >= 63 { |
535 | break; |
536 | } |
537 | // > ... push its Bidi_Paired_Bracket property value and its text position onto the stack |
538 | stack.push((matched.opening, actual_index, run_index)) |
539 | } else { |
540 | // > If a closing paired bracket is found, do the following |
541 | |
542 | // > Declare a variable that holds a reference to the current stack element |
543 | // > and initialize it with the top element of the stack. |
544 | // AND |
545 | // > Else, if the current stack element is not at the bottom of the stack |
546 | for (stack_index, element) in stack.iter().enumerate().rev() { |
547 | // > Compare the closing paired bracket being inspected or its canonical |
548 | // > equivalent to the bracket in the current stack element. |
549 | if element.0 == matched.opening { |
550 | // > If the values match, meaning the two characters form a bracket pair, then |
551 | |
552 | // > Append the text position in the current stack element together with the |
553 | // > text position of the closing paired bracket to the list. |
554 | let pair = BracketPair { |
555 | start: element.1, |
556 | end: actual_index, |
557 | start_run: element.2, |
558 | end_run: run_index, |
559 | }; |
560 | bracket_pairs.push(pair); |
561 | |
562 | // > Pop the stack through the current stack element inclusively. |
563 | stack.truncate(stack_index); |
564 | break; |
565 | } |
566 | } |
567 | } |
568 | } |
569 | } |
570 | } |
571 | // > Sort the list of pairs of text positions in ascending order based on |
572 | // > the text position of the opening paired bracket. |
573 | bracket_pairs.sort_by_key(|r| r.start); |
574 | } |
575 | |
576 | /// 3.3.6 Resolving Implicit Levels |
577 | /// |
578 | /// Returns the maximum embedding level in the paragraph. |
579 | /// |
580 | /// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels> |
581 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
582 | pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level { |
583 | let mut max_level: Level = Level::ltr(); |
584 | assert_eq!(processing_classes.len(), levels.len()); |
585 | for i: usize in 0..levels.len() { |
586 | match (levels[i].is_rtl(), processing_classes[i]) { |
587 | (false, AN) | (false, EN) => levels[i].raise(2).expect(msg:"Level number error" ), |
588 | (false, R) | (true, L) | (true, EN) | (true, AN) => { |
589 | levels[i].raise(1).expect(msg:"Level number error" ) |
590 | } |
591 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> handled here |
592 | (_, _) => {} |
593 | } |
594 | max_level = max(v1:max_level, v2:levels[i]); |
595 | } |
596 | |
597 | max_level |
598 | } |
599 | |
600 | /// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI) |
601 | /// |
602 | /// <http://www.unicode.org/reports/tr9/#NI> |
603 | #[allow (non_snake_case)] |
604 | fn is_NI(class: BidiClass) -> bool { |
605 | matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) |
606 | } |
607 | |