| 1 | // Copyright 2015 The Servo Project Developers. See the |
| 2 | // COPYRIGHT file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | //! 3.3.4 - 3.3.6. Resolve implicit levels and types. |
| 11 | |
| 12 | #[cfg (not(feature = "smallvec" ))] |
| 13 | use alloc::vec::Vec; |
| 14 | use core::cmp::max; |
| 15 | #[cfg (feature = "smallvec" )] |
| 16 | use smallvec::SmallVec; |
| 17 | |
| 18 | use super::char_data::BidiClass::{self, *}; |
| 19 | use super::level::Level; |
| 20 | use super::prepare::{not_removed_by_x9, IsolatingRunSequence}; |
| 21 | use super::{BidiDataSource, TextSource}; |
| 22 | |
| 23 | /// 3.3.4 Resolving Weak Types |
| 24 | /// |
| 25 | /// <http://www.unicode.org/reports/tr9/#Resolving_Weak_Types> |
| 26 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
| 27 | pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( |
| 28 | text: &'a T, |
| 29 | sequence: &IsolatingRunSequence, |
| 30 | processing_classes: &mut [BidiClass], |
| 31 | ) { |
| 32 | // Note: The spec treats these steps as individual passes that are applied one after the other |
| 33 | // on the entire IsolatingRunSequence at once. We instead collapse it into a single iteration, |
| 34 | // which is straightforward for rules that are based on the state of the current character, but not |
| 35 | // for rules that care about surrounding characters. To deal with them, we retain additional state |
| 36 | // about previous character classes that may have since been changed by later rules. |
| 37 | |
| 38 | // The previous class for the purposes of rule W4/W6, not tracking changes made after or during W4. |
| 39 | let mut prev_class_before_w4 = sequence.sos; |
| 40 | // The previous class for the purposes of rule W5. |
| 41 | let mut prev_class_before_w5 = sequence.sos; |
| 42 | // The previous class for the purposes of rule W1, not tracking changes from any other rules. |
| 43 | let mut prev_class_before_w1 = sequence.sos; |
| 44 | let mut last_strong_is_al = false; |
| 45 | #[cfg (feature = "smallvec" )] |
| 46 | let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 |
| 47 | #[cfg (not(feature = "smallvec" ))] |
| 48 | let mut et_run_indices = Vec::new(); // for W5 |
| 49 | #[cfg (feature = "smallvec" )] |
| 50 | let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 51 | #[cfg (not(feature = "smallvec" ))] |
| 52 | let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 53 | |
| 54 | for (run_index, level_run) in sequence.runs.iter().enumerate() { |
| 55 | for i in &mut level_run.clone() { |
| 56 | if processing_classes[i] == BN { |
| 57 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 58 | // Keeps track of bn runs for W5 in case we see an ET. |
| 59 | bn_run_indices.push(i); |
| 60 | // BNs aren't real, skip over them. |
| 61 | continue; |
| 62 | } |
| 63 | |
| 64 | // Store the processing class of all rules before W2/W1. |
| 65 | // Used to keep track of the last strong character for W2. W3 is able to insert new strong |
| 66 | // characters, so we don't want to be misled by it. |
| 67 | let mut w2_processing_class = processing_classes[i]; |
| 68 | |
| 69 | // <http://www.unicode.org/reports/tr9/#W1> |
| 70 | // |
| 71 | |
| 72 | if processing_classes[i] == NSM { |
| 73 | processing_classes[i] = match prev_class_before_w1 { |
| 74 | RLI | LRI | FSI | PDI => ON, |
| 75 | _ => prev_class_before_w1, |
| 76 | }; |
| 77 | // W1 occurs before W2, update this. |
| 78 | w2_processing_class = processing_classes[i]; |
| 79 | } |
| 80 | |
| 81 | prev_class_before_w1 = processing_classes[i]; |
| 82 | |
| 83 | // <http://www.unicode.org/reports/tr9/#W2> |
| 84 | // <http://www.unicode.org/reports/tr9/#W3> |
| 85 | // |
| 86 | match processing_classes[i] { |
| 87 | EN => { |
| 88 | if last_strong_is_al { |
| 89 | // W2. If previous strong char was AL, change EN to AN. |
| 90 | processing_classes[i] = AN; |
| 91 | } |
| 92 | } |
| 93 | // W3. |
| 94 | AL => processing_classes[i] = R, |
| 95 | _ => {} |
| 96 | } |
| 97 | |
| 98 | // update last_strong_is_al. |
| 99 | match w2_processing_class { |
| 100 | L | R => { |
| 101 | last_strong_is_al = false; |
| 102 | } |
| 103 | AL => { |
| 104 | last_strong_is_al = true; |
| 105 | } |
| 106 | _ => {} |
| 107 | } |
| 108 | |
| 109 | let class_before_w456 = processing_classes[i]; |
| 110 | |
| 111 | // <http://www.unicode.org/reports/tr9/#W4> |
| 112 | // <http://www.unicode.org/reports/tr9/#W5> |
| 113 | // <http://www.unicode.org/reports/tr9/#W6> (separators only) |
| 114 | // (see below for W6 terminator code) |
| 115 | // |
| 116 | match processing_classes[i] { |
| 117 | // <http://www.unicode.org/reports/tr9/#W6> |
| 118 | EN => { |
| 119 | // W5. If a run of ETs is adjacent to an EN, change the ETs to EN. |
| 120 | for j in &et_run_indices { |
| 121 | processing_classes[*j] = EN; |
| 122 | } |
| 123 | et_run_indices.clear(); |
| 124 | } |
| 125 | |
| 126 | // <http://www.unicode.org/reports/tr9/#W4> |
| 127 | // <http://www.unicode.org/reports/tr9/#W6> |
| 128 | ES | CS => { |
| 129 | // See https://github.com/servo/unicode-bidi/issues/86 for improving this. |
| 130 | // We want to make sure we check the correct next character by skipping past the rest |
| 131 | // of this one. |
| 132 | if let Some((_, char_len)) = text.char_at(i) { |
| 133 | let mut next_class = sequence |
| 134 | .iter_forwards_from(i + char_len, run_index) |
| 135 | .map(|j| processing_classes[j]) |
| 136 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 137 | .find(not_removed_by_x9) |
| 138 | .unwrap_or(sequence.eos); |
| 139 | if next_class == EN && last_strong_is_al { |
| 140 | // Apply W2 to next_class. We know that last_strong_is_al |
| 141 | // has no chance of changing on this character so we can still assume its value |
| 142 | // will be the same by the time we get to it. |
| 143 | next_class = AN; |
| 144 | } |
| 145 | processing_classes[i] = |
| 146 | match (prev_class_before_w4, processing_classes[i], next_class) { |
| 147 | // W4 |
| 148 | (EN, ES, EN) | (EN, CS, EN) => EN, |
| 149 | // W4 |
| 150 | (AN, CS, AN) => AN, |
| 151 | // W6 (separators only) |
| 152 | (_, _, _) => ON, |
| 153 | }; |
| 154 | |
| 155 | // W6 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 156 | // We have to do this before W5 gets its grubby hands on these characters and thinks |
| 157 | // they're part of an ET run. |
| 158 | // We check for ON to ensure that we had hit the W6 branch above, since this `ES | CS` match |
| 159 | // arm handles both W4 and W6. |
| 160 | if processing_classes[i] == ON { |
| 161 | for idx in sequence.iter_backwards_from(i, run_index) { |
| 162 | let class = &mut processing_classes[idx]; |
| 163 | if *class != BN { |
| 164 | break; |
| 165 | } |
| 166 | *class = ON; |
| 167 | } |
| 168 | for idx in sequence.iter_forwards_from(i + char_len, run_index) { |
| 169 | let class = &mut processing_classes[idx]; |
| 170 | if *class != BN { |
| 171 | break; |
| 172 | } |
| 173 | *class = ON; |
| 174 | } |
| 175 | } |
| 176 | } else { |
| 177 | // We're in the middle of a character, copy over work done for previous bytes |
| 178 | // since it's going to be the same answer. |
| 179 | processing_classes[i] = processing_classes[i - 1]; |
| 180 | } |
| 181 | } |
| 182 | // <http://www.unicode.org/reports/tr9/#W5> |
| 183 | ET => { |
| 184 | match prev_class_before_w5 { |
| 185 | EN => processing_classes[i] = EN, |
| 186 | _ => { |
| 187 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 188 | // If there was a BN run before this, that's now a part of this ET run. |
| 189 | et_run_indices.extend(bn_run_indices.clone()); |
| 190 | |
| 191 | // In case this is followed by an EN. |
| 192 | et_run_indices.push(i); |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | _ => {} |
| 197 | } |
| 198 | |
| 199 | // Common loop iteration code |
| 200 | // |
| 201 | |
| 202 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 203 | // BN runs would have already continued the loop, clear them before we get to the next one. |
| 204 | bn_run_indices.clear(); |
| 205 | |
| 206 | // W6 above only deals with separators, so it doesn't change anything W5 cares about, |
| 207 | // so we still can update this after running that part of W6. |
| 208 | prev_class_before_w5 = processing_classes[i]; |
| 209 | |
| 210 | // <http://www.unicode.org/reports/tr9/#W6> (terminators only) |
| 211 | // (see above for W6 separator code) |
| 212 | // |
| 213 | if prev_class_before_w5 != ET { |
| 214 | // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. |
| 215 | for j in &et_run_indices { |
| 216 | processing_classes[*j] = ON; |
| 217 | } |
| 218 | et_run_indices.clear(); |
| 219 | } |
| 220 | |
| 221 | // We stashed this before W4/5/6 could get their grubby hands on it, and it's not |
| 222 | // used in the W6 terminator code below so we can update it now. |
| 223 | prev_class_before_w4 = class_before_w456; |
| 224 | } |
| 225 | } |
| 226 | // Rerun this check in case we ended with a sequence of BNs (i.e., we'd never |
| 227 | // hit the end of the for loop above). |
| 228 | // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. |
| 229 | for j in &et_run_indices { |
| 230 | processing_classes[*j] = ON; |
| 231 | } |
| 232 | et_run_indices.clear(); |
| 233 | |
| 234 | // W7. If the previous strong char was L, change EN to L. |
| 235 | let mut last_strong_is_l = sequence.sos == L; |
| 236 | for i in sequence.runs.iter().cloned().flatten() { |
| 237 | match processing_classes[i] { |
| 238 | EN if last_strong_is_l => { |
| 239 | processing_classes[i] = L; |
| 240 | } |
| 241 | L => { |
| 242 | last_strong_is_l = true; |
| 243 | } |
| 244 | R | AL => { |
| 245 | last_strong_is_l = false; |
| 246 | } |
| 247 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 248 | // Already scanning past BN here. |
| 249 | _ => {} |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | #[cfg (feature = "smallvec" )] |
| 255 | type BracketPairVec = SmallVec<[BracketPair; 8]>; |
| 256 | #[cfg (not(feature = "smallvec" ))] |
| 257 | type BracketPairVec = Vec<BracketPair>; |
| 258 | |
| 259 | /// 3.3.5 Resolving Neutral Types |
| 260 | /// |
| 261 | /// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types> |
| 262 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
| 263 | pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( |
| 264 | text: &'a T, |
| 265 | data_source: &D, |
| 266 | sequence: &IsolatingRunSequence, |
| 267 | levels: &[Level], |
| 268 | original_classes: &[BidiClass], |
| 269 | processing_classes: &mut [BidiClass], |
| 270 | ) { |
| 271 | // e = embedding direction |
| 272 | let e: BidiClass = levels[sequence.runs[0].start].bidi_class(); |
| 273 | let not_e = if e == BidiClass::L { |
| 274 | BidiClass::R |
| 275 | } else { |
| 276 | BidiClass::L |
| 277 | }; |
| 278 | // N0. Process bracket pairs. |
| 279 | |
| 280 | // > Identify the bracket pairs in the current isolating run sequence according to BD16. |
| 281 | // We use processing_classes, not original_classes, due to BD14/BD15 |
| 282 | let mut bracket_pairs = BracketPairVec::new(); |
| 283 | identify_bracket_pairs( |
| 284 | text, |
| 285 | data_source, |
| 286 | sequence, |
| 287 | processing_classes, |
| 288 | &mut bracket_pairs, |
| 289 | ); |
| 290 | |
| 291 | // > For each bracket-pair element in the list of pairs of text positions |
| 292 | // |
| 293 | // Note: Rust ranges are interpreted as [start..end), be careful using `pair` directly |
| 294 | // for indexing as it will include the opening bracket pair but not the closing one. |
| 295 | for pair in bracket_pairs { |
| 296 | #[cfg (feature = "std" )] |
| 297 | debug_assert!( |
| 298 | pair.start < processing_classes.len(), |
| 299 | "identify_bracket_pairs returned a range that is out of bounds!" |
| 300 | ); |
| 301 | #[cfg (feature = "std" )] |
| 302 | debug_assert!( |
| 303 | pair.end < processing_classes.len(), |
| 304 | "identify_bracket_pairs returned a range that is out of bounds!" |
| 305 | ); |
| 306 | let mut found_e = false; |
| 307 | let mut found_not_e = false; |
| 308 | let mut class_to_set = None; |
| 309 | |
| 310 | let start_char_len = |
| 311 | T::char_len(text.subrange(pair.start..pair.end).chars().next().unwrap()); |
| 312 | // > Inspect the bidirectional types of the characters enclosed within the bracket pair. |
| 313 | // |
| 314 | // `pair` is [start, end) so we will end up processing the opening character but not the closing one. |
| 315 | // |
| 316 | for enclosed_i in sequence.iter_forwards_from(pair.start + start_char_len, pair.start_run) { |
| 317 | if enclosed_i >= pair.end { |
| 318 | #[cfg (feature = "std" )] |
| 319 | debug_assert!( |
| 320 | enclosed_i == pair.end, |
| 321 | "If we skipped past this, the iterator is broken" |
| 322 | ); |
| 323 | break; |
| 324 | } |
| 325 | let class = processing_classes[enclosed_i]; |
| 326 | if class == e { |
| 327 | found_e = true; |
| 328 | } else if class == not_e { |
| 329 | found_not_e = true; |
| 330 | } else if matches!(class, BidiClass::EN | BidiClass::AN) { |
| 331 | // > Within this scope, bidirectional types EN and AN are treated as R. |
| 332 | if e == BidiClass::L { |
| 333 | found_not_e = true; |
| 334 | } else { |
| 335 | found_e = true; |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | // If we have found a character with the class of the embedding direction |
| 340 | // we can bail early. |
| 341 | if found_e { |
| 342 | break; |
| 343 | } |
| 344 | } |
| 345 | // > If any strong type (either L or R) matching the embedding direction is found |
| 346 | if found_e { |
| 347 | // > .. set the type for both brackets in the pair to match the embedding direction |
| 348 | class_to_set = Some(e); |
| 349 | // > Otherwise, if there is a strong type it must be opposite the embedding direction |
| 350 | } else if found_not_e { |
| 351 | // > Therefore, test for an established context with a preceding strong type by |
| 352 | // > checking backwards before the opening paired bracket |
| 353 | // > until the first strong type (L, R, or sos) is found. |
| 354 | // (see note above about processing_classes and character boundaries) |
| 355 | let mut previous_strong = sequence |
| 356 | .iter_backwards_from(pair.start, pair.start_run) |
| 357 | .map(|i| processing_classes[i]) |
| 358 | .find(|class| { |
| 359 | matches!( |
| 360 | class, |
| 361 | BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN |
| 362 | ) |
| 363 | }) |
| 364 | .unwrap_or(sequence.sos); |
| 365 | |
| 366 | // > Within this scope, bidirectional types EN and AN are treated as R. |
| 367 | if matches!(previous_strong, BidiClass::EN | BidiClass::AN) { |
| 368 | previous_strong = BidiClass::R; |
| 369 | } |
| 370 | |
| 371 | // > If the preceding strong type is also opposite the embedding direction, |
| 372 | // > context is established, |
| 373 | // > so set the type for both brackets in the pair to that direction. |
| 374 | // AND |
| 375 | // > Otherwise set the type for both brackets in the pair to the embedding direction. |
| 376 | // > Either way it gets set to previous_strong |
| 377 | // |
| 378 | // Both branches amount to setting the type to the strong type. |
| 379 | class_to_set = Some(previous_strong); |
| 380 | } |
| 381 | |
| 382 | if let Some(class_to_set) = class_to_set { |
| 383 | // Update all processing classes corresponding to the start and end elements, as requested. |
| 384 | // We should include all bytes of the character, not the first one. |
| 385 | let end_char_len = |
| 386 | T::char_len(text.subrange(pair.end..text.len()).chars().next().unwrap()); |
| 387 | for class in &mut processing_classes[pair.start..pair.start + start_char_len] { |
| 388 | *class = class_to_set; |
| 389 | } |
| 390 | for class in &mut processing_classes[pair.end..pair.end + end_char_len] { |
| 391 | *class = class_to_set; |
| 392 | } |
| 393 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 394 | for idx in sequence.iter_backwards_from(pair.start, pair.start_run) { |
| 395 | let class = &mut processing_classes[idx]; |
| 396 | if *class != BN { |
| 397 | break; |
| 398 | } |
| 399 | *class = class_to_set; |
| 400 | } |
| 401 | // > Any number of characters that had original bidirectional character type NSM prior to the application of |
| 402 | // > W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match the type of their preceding bracket. |
| 403 | |
| 404 | // This rule deals with sequences of NSMs, so we can just update them all at once, we don't need to worry |
| 405 | // about character boundaries. We do need to be careful to skip the full set of bytes for the parentheses characters. |
| 406 | let nsm_start = pair.start + start_char_len; |
| 407 | for idx in sequence.iter_forwards_from(nsm_start, pair.start_run) { |
| 408 | let class = original_classes[idx]; |
| 409 | if class == BidiClass::NSM || processing_classes[idx] == BN { |
| 410 | processing_classes[idx] = class_to_set; |
| 411 | } else { |
| 412 | break; |
| 413 | } |
| 414 | } |
| 415 | let nsm_end = pair.end + end_char_len; |
| 416 | for idx in sequence.iter_forwards_from(nsm_end, pair.end_run) { |
| 417 | let class = original_classes[idx]; |
| 418 | if class == BidiClass::NSM || processing_classes[idx] == BN { |
| 419 | processing_classes[idx] = class_to_set; |
| 420 | } else { |
| 421 | break; |
| 422 | } |
| 423 | } |
| 424 | } |
| 425 | // > Otherwise, there are no strong types within the bracket pair |
| 426 | // > Therefore, do not set the type for that bracket pair |
| 427 | } |
| 428 | |
| 429 | // N1 and N2. |
| 430 | // Indices of every byte in this isolating run sequence |
| 431 | let mut indices = sequence.runs.iter().flat_map(Clone::clone); |
| 432 | let mut prev_class = sequence.sos; |
| 433 | while let Some(mut i) = indices.next() { |
| 434 | // Process sequences of NI characters. |
| 435 | #[cfg (feature = "smallvec" )] |
| 436 | let mut ni_run = SmallVec::<[usize; 8]>::new(); |
| 437 | #[cfg (not(feature = "smallvec" ))] |
| 438 | let mut ni_run = Vec::new(); |
| 439 | // The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 440 | if is_NI(processing_classes[i]) || processing_classes[i] == BN { |
| 441 | // Consume a run of consecutive NI characters. |
| 442 | ni_run.push(i); |
| 443 | let mut next_class; |
| 444 | loop { |
| 445 | match indices.next() { |
| 446 | Some(j) => { |
| 447 | i = j; |
| 448 | next_class = processing_classes[j]; |
| 449 | // The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> |
| 450 | if is_NI(next_class) || next_class == BN { |
| 451 | ni_run.push(i); |
| 452 | } else { |
| 453 | break; |
| 454 | } |
| 455 | } |
| 456 | None => { |
| 457 | next_class = sequence.eos; |
| 458 | break; |
| 459 | } |
| 460 | }; |
| 461 | } |
| 462 | // N1-N2. |
| 463 | // |
| 464 | // <http://www.unicode.org/reports/tr9/#N1> |
| 465 | // <http://www.unicode.org/reports/tr9/#N2> |
| 466 | let new_class = match (prev_class, next_class) { |
| 467 | (L, L) => L, |
| 468 | (R, R) |
| 469 | | (R, AN) |
| 470 | | (R, EN) |
| 471 | | (AN, R) |
| 472 | | (AN, AN) |
| 473 | | (AN, EN) |
| 474 | | (EN, R) |
| 475 | | (EN, AN) |
| 476 | | (EN, EN) => R, |
| 477 | (_, _) => e, |
| 478 | }; |
| 479 | for j in &ni_run { |
| 480 | processing_classes[*j] = new_class; |
| 481 | } |
| 482 | ni_run.clear(); |
| 483 | } |
| 484 | prev_class = processing_classes[i]; |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | struct BracketPair { |
| 489 | /// The text-relative index of the opening bracket. |
| 490 | start: usize, |
| 491 | /// The text-relative index of the closing bracket. |
| 492 | end: usize, |
| 493 | /// The index of the run (in the run sequence) that the opening bracket is in. |
| 494 | start_run: usize, |
| 495 | /// The index of the run (in the run sequence) that the closing bracket is in. |
| 496 | end_run: usize, |
| 497 | } |
| 498 | /// 3.1.3 Identifying Bracket Pairs |
| 499 | /// |
| 500 | /// Returns all paired brackets in the source, as indices into the |
| 501 | /// text source. |
| 502 | /// |
| 503 | /// <https://www.unicode.org/reports/tr9/#BD16> |
| 504 | fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( |
| 505 | text: &'a T, |
| 506 | data_source: &D, |
| 507 | run_sequence: &IsolatingRunSequence, |
| 508 | original_classes: &[BidiClass], |
| 509 | bracket_pairs: &mut BracketPairVec, |
| 510 | ) { |
| 511 | #[cfg (feature = "smallvec" )] |
| 512 | let mut stack = SmallVec::<[(char, usize, usize); 8]>::new(); |
| 513 | #[cfg (not(feature = "smallvec" ))] |
| 514 | let mut stack = Vec::new(); |
| 515 | |
| 516 | for (run_index, level_run) in run_sequence.runs.iter().enumerate() { |
| 517 | for (i, ch) in text.subrange(level_run.clone()).char_indices() { |
| 518 | let actual_index = level_run.start + i; |
| 519 | |
| 520 | // All paren characters are ON. |
| 521 | // From BidiBrackets.txt: |
| 522 | // > The Unicode property value stability policy guarantees that characters |
| 523 | // > which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y |
| 524 | if original_classes[actual_index] != BidiClass::ON { |
| 525 | continue; |
| 526 | } |
| 527 | |
| 528 | if let Some(matched) = data_source.bidi_matched_opening_bracket(ch) { |
| 529 | if matched.is_open { |
| 530 | // > If an opening paired bracket is found ... |
| 531 | |
| 532 | // > ... and there is no room in the stack, |
| 533 | // > stop processing BD16 for the remainder of the isolating run sequence. |
| 534 | if stack.len() >= 63 { |
| 535 | break; |
| 536 | } |
| 537 | // > ... push its Bidi_Paired_Bracket property value and its text position onto the stack |
| 538 | stack.push((matched.opening, actual_index, run_index)) |
| 539 | } else { |
| 540 | // > If a closing paired bracket is found, do the following |
| 541 | |
| 542 | // > Declare a variable that holds a reference to the current stack element |
| 543 | // > and initialize it with the top element of the stack. |
| 544 | // AND |
| 545 | // > Else, if the current stack element is not at the bottom of the stack |
| 546 | for (stack_index, element) in stack.iter().enumerate().rev() { |
| 547 | // > Compare the closing paired bracket being inspected or its canonical |
| 548 | // > equivalent to the bracket in the current stack element. |
| 549 | if element.0 == matched.opening { |
| 550 | // > If the values match, meaning the two characters form a bracket pair, then |
| 551 | |
| 552 | // > Append the text position in the current stack element together with the |
| 553 | // > text position of the closing paired bracket to the list. |
| 554 | let pair = BracketPair { |
| 555 | start: element.1, |
| 556 | end: actual_index, |
| 557 | start_run: element.2, |
| 558 | end_run: run_index, |
| 559 | }; |
| 560 | bracket_pairs.push(pair); |
| 561 | |
| 562 | // > Pop the stack through the current stack element inclusively. |
| 563 | stack.truncate(stack_index); |
| 564 | break; |
| 565 | } |
| 566 | } |
| 567 | } |
| 568 | } |
| 569 | } |
| 570 | } |
| 571 | // > Sort the list of pairs of text positions in ascending order based on |
| 572 | // > the text position of the opening paired bracket. |
| 573 | bracket_pairs.sort_by_key(|r| r.start); |
| 574 | } |
| 575 | |
| 576 | /// 3.3.6 Resolving Implicit Levels |
| 577 | /// |
| 578 | /// Returns the maximum embedding level in the paragraph. |
| 579 | /// |
| 580 | /// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels> |
| 581 | #[cfg_attr (feature = "flame_it" , flamer::flame)] |
| 582 | pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level { |
| 583 | let mut max_level: Level = Level::ltr(); |
| 584 | assert_eq!(processing_classes.len(), levels.len()); |
| 585 | for i: usize in 0..levels.len() { |
| 586 | match (levels[i].is_rtl(), processing_classes[i]) { |
| 587 | (false, AN) | (false, EN) => levels[i].raise(2).expect(msg:"Level number error" ), |
| 588 | (false, R) | (true, L) | (true, EN) | (true, AN) => { |
| 589 | levels[i].raise(1).expect(msg:"Level number error" ) |
| 590 | } |
| 591 | // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> handled here |
| 592 | (_, _) => {} |
| 593 | } |
| 594 | max_level = max(v1:max_level, v2:levels[i]); |
| 595 | } |
| 596 | |
| 597 | max_level |
| 598 | } |
| 599 | |
| 600 | /// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI) |
| 601 | /// |
| 602 | /// <http://www.unicode.org/reports/tr9/#NI> |
| 603 | #[allow (non_snake_case)] |
| 604 | fn is_NI(class: BidiClass) -> bool { |
| 605 | matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) |
| 606 | } |
| 607 | |