1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! 3.3.2 Explicit Levels and Directions
11//!
12//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14use alloc::vec::Vec;
15
16use super::char_data::{
17 is_rtl,
18 BidiClass::{self, *},
19};
20use super::level::Level;
21use super::TextSource;
22
23/// Compute explicit embedding levels for one paragraph of text (X1-X8).
24///
25/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
26/// for each char in `text`.
27#[cfg_attr(feature = "flame_it", flamer::flame)]
28pub fn compute<'a, T: TextSource<'a> + ?Sized>(
29 text: &'a T,
30 para_level: Level,
31 original_classes: &[BidiClass],
32 levels: &mut [Level],
33 processing_classes: &mut [BidiClass],
34) {
35 assert_eq!(text.len(), original_classes.len());
36
37 // <http://www.unicode.org/reports/tr9/#X1>
38 let mut stack = DirectionalStatusStack::new();
39 stack.push(para_level, OverrideStatus::Neutral);
40
41 let mut overflow_isolate_count = 0u32;
42 let mut overflow_embedding_count = 0u32;
43 let mut valid_isolate_count = 0u32;
44
45 for (i, len) in text.indices_lengths() {
46 match original_classes[i] {
47 // Rules X2-X5c
48 RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
49 let last_level = stack.last().level;
50
51 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
52 levels[i] = last_level;
53
54 // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
55 let is_isolate = match original_classes[i] {
56 RLI | LRI | FSI => true,
57 _ => false,
58 };
59 if is_isolate {
60 // Redundant due to "Retaining explicit formatting characters" step.
61 // levels[i] = last_level;
62 match stack.last().status {
63 OverrideStatus::RTL => processing_classes[i] = R,
64 OverrideStatus::LTR => processing_classes[i] = L,
65 _ => {}
66 }
67 }
68
69 let new_level = if is_rtl(original_classes[i]) {
70 last_level.new_explicit_next_rtl()
71 } else {
72 last_level.new_explicit_next_ltr()
73 };
74 if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
75 {
76 let new_level = new_level.unwrap();
77 stack.push(
78 new_level,
79 match original_classes[i] {
80 RLO => OverrideStatus::RTL,
81 LRO => OverrideStatus::LTR,
82 RLI | LRI | FSI => OverrideStatus::Isolate,
83 _ => OverrideStatus::Neutral,
84 },
85 );
86 if is_isolate {
87 valid_isolate_count += 1;
88 } else {
89 // The spec doesn't explicitly mention this step, but it is necessary.
90 // See the reference implementations for comparison.
91 levels[i] = new_level;
92 }
93 } else if is_isolate {
94 overflow_isolate_count += 1;
95 } else if overflow_isolate_count == 0 {
96 overflow_embedding_count += 1;
97 }
98
99 if !is_isolate {
100 // X9 +
101 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
102 // (PDF handled below)
103 processing_classes[i] = BN;
104 }
105 }
106
107 // <http://www.unicode.org/reports/tr9/#X6a>
108 PDI => {
109 if overflow_isolate_count > 0 {
110 overflow_isolate_count -= 1;
111 } else if valid_isolate_count > 0 {
112 overflow_embedding_count = 0;
113 loop {
114 // Pop everything up to and including the last Isolate status.
115 match stack.vec.pop() {
116 None
117 | Some(Status {
118 status: OverrideStatus::Isolate,
119 ..
120 }) => break,
121 _ => continue,
122 }
123 }
124 valid_isolate_count -= 1;
125 }
126 let last = stack.last();
127 levels[i] = last.level;
128 match last.status {
129 OverrideStatus::RTL => processing_classes[i] = R,
130 OverrideStatus::LTR => processing_classes[i] = L,
131 _ => {}
132 }
133 }
134
135 // <http://www.unicode.org/reports/tr9/#X7>
136 PDF => {
137 if overflow_isolate_count > 0 {
138 // do nothing
139 } else if overflow_embedding_count > 0 {
140 overflow_embedding_count -= 1;
141 } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
142 stack.vec.pop();
143 }
144 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
145 levels[i] = stack.last().level;
146 // X9 part of retaining explicit formatting characters.
147 processing_classes[i] = BN;
148 }
149
150 // Nothing.
151 // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
152 B => {}
153
154 // <http://www.unicode.org/reports/tr9/#X6>
155 _ => {
156 let last = stack.last();
157 levels[i] = last.level;
158 // This condition is not in the spec, but I am pretty sure that is a spec bug.
159 // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
160 if original_classes[i] != BN {
161 match last.status {
162 OverrideStatus::RTL => processing_classes[i] = R,
163 OverrideStatus::LTR => processing_classes[i] = L,
164 _ => {}
165 }
166 }
167 }
168 }
169
170 // Handle multi-byte characters.
171 for j in 1..len {
172 levels[i + j] = levels[i];
173 processing_classes[i + j] = processing_classes[i];
174 }
175 }
176}
177
178/// Entries in the directional status stack:
179struct Status {
180 level: Level,
181 status: OverrideStatus,
182}
183
184#[derive(PartialEq)]
185enum OverrideStatus {
186 Neutral,
187 RTL,
188 LTR,
189 Isolate,
190}
191
192struct DirectionalStatusStack {
193 vec: Vec<Status>,
194}
195
196impl DirectionalStatusStack {
197 fn new() -> Self {
198 DirectionalStatusStack {
199 vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
200 }
201 }
202
203 fn push(&mut self, level: Level, status: OverrideStatus) {
204 self.vec.push(Status { level, status });
205 }
206
207 fn last(&self) -> &Status {
208 self.vec.last().unwrap()
209 }
210}
211