1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! 3.3.2 Explicit Levels and Directions
11//!
12//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14use alloc::vec::Vec;
15
16use super::char_data::{
17 is_rtl,
18 BidiClass::{self, *},
19};
20use super::level::Level;
21
22/// Compute explicit embedding levels for one paragraph of text (X1-X8).
23///
24/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
25/// for each char in `text`.
26#[cfg_attr(feature = "flame_it", flamer::flame)]
27pub fn compute(
28 text: &str,
29 para_level: Level,
30 original_classes: &[BidiClass],
31 levels: &mut [Level],
32 processing_classes: &mut [BidiClass],
33) {
34 assert_eq!(text.len(), original_classes.len());
35
36 // <http://www.unicode.org/reports/tr9/#X1>
37 let mut stack = DirectionalStatusStack::new();
38 stack.push(para_level, OverrideStatus::Neutral);
39
40 let mut overflow_isolate_count = 0u32;
41 let mut overflow_embedding_count = 0u32;
42 let mut valid_isolate_count = 0u32;
43
44 for (i, c) in text.char_indices() {
45 match original_classes[i] {
46 // Rules X2-X5c
47 RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
48 let last_level = stack.last().level;
49
50 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
51 levels[i] = last_level;
52
53 // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
54 let is_isolate = match original_classes[i] {
55 RLI | LRI | FSI => true,
56 _ => false,
57 };
58 if is_isolate {
59 // Redundant due to "Retaining explicit formatting characters" step.
60 // levels[i] = last_level;
61 match stack.last().status {
62 OverrideStatus::RTL => processing_classes[i] = R,
63 OverrideStatus::LTR => processing_classes[i] = L,
64 _ => {}
65 }
66 }
67
68 let new_level = if is_rtl(original_classes[i]) {
69 last_level.new_explicit_next_rtl()
70 } else {
71 last_level.new_explicit_next_ltr()
72 };
73 if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
74 {
75 let new_level = new_level.unwrap();
76 stack.push(
77 new_level,
78 match original_classes[i] {
79 RLO => OverrideStatus::RTL,
80 LRO => OverrideStatus::LTR,
81 RLI | LRI | FSI => OverrideStatus::Isolate,
82 _ => OverrideStatus::Neutral,
83 },
84 );
85 if is_isolate {
86 valid_isolate_count += 1;
87 } else {
88 // The spec doesn't explicitly mention this step, but it is necessary.
89 // See the reference implementations for comparison.
90 levels[i] = new_level;
91 }
92 } else if is_isolate {
93 overflow_isolate_count += 1;
94 } else if overflow_isolate_count == 0 {
95 overflow_embedding_count += 1;
96 }
97
98 if !is_isolate {
99 // X9 +
100 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
101 // (PDF handled below)
102 processing_classes[i] = BN;
103 }
104 }
105
106 // <http://www.unicode.org/reports/tr9/#X6a>
107 PDI => {
108 if overflow_isolate_count > 0 {
109 overflow_isolate_count -= 1;
110 } else if valid_isolate_count > 0 {
111 overflow_embedding_count = 0;
112 loop {
113 // Pop everything up to and including the last Isolate status.
114 match stack.vec.pop() {
115 None
116 | Some(Status {
117 status: OverrideStatus::Isolate,
118 ..
119 }) => break,
120 _ => continue,
121 }
122 }
123 valid_isolate_count -= 1;
124 }
125 let last = stack.last();
126 levels[i] = last.level;
127 match last.status {
128 OverrideStatus::RTL => processing_classes[i] = R,
129 OverrideStatus::LTR => processing_classes[i] = L,
130 _ => {}
131 }
132 }
133
134 // <http://www.unicode.org/reports/tr9/#X7>
135 PDF => {
136 if overflow_isolate_count > 0 {
137 // do nothing
138 } else if overflow_embedding_count > 0 {
139 overflow_embedding_count -= 1;
140 } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
141 stack.vec.pop();
142 }
143 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
144 levels[i] = stack.last().level;
145 // X9 part of retaining explicit formatting characters.
146 processing_classes[i] = BN;
147 }
148
149 // Nothing.
150 // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
151 B => {}
152
153 // <http://www.unicode.org/reports/tr9/#X6>
154 _ => {
155 let last = stack.last();
156 levels[i] = last.level;
157 // This condition is not in the spec, but I am pretty sure that is a spec bug.
158 // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
159 if original_classes[i] != BN {
160 match last.status {
161 OverrideStatus::RTL => processing_classes[i] = R,
162 OverrideStatus::LTR => processing_classes[i] = L,
163 _ => {}
164 }
165 }
166 }
167 }
168
169 // Handle multi-byte characters.
170 for j in 1..c.len_utf8() {
171 levels[i + j] = levels[i];
172 processing_classes[i + j] = processing_classes[i];
173 }
174 }
175}
176
177/// Entries in the directional status stack:
178struct Status {
179 level: Level,
180 status: OverrideStatus,
181}
182
183#[derive(PartialEq)]
184enum OverrideStatus {
185 Neutral,
186 RTL,
187 LTR,
188 Isolate,
189}
190
191struct DirectionalStatusStack {
192 vec: Vec<Status>,
193}
194
195impl DirectionalStatusStack {
196 fn new() -> Self {
197 DirectionalStatusStack {
198 vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
199 }
200 }
201
202 fn push(&mut self, level: Level, status: OverrideStatus) {
203 self.vec.push(Status { level, status });
204 }
205
206 fn last(&self) -> &Status {
207 self.vec.last().unwrap()
208 }
209}
210