1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) |
11 | |
12 | mod tables; |
13 | |
14 | pub use self::tables::{BidiClass, UNICODE_VERSION}; |
15 | #[cfg (feature = "hardcoded-data" )] |
16 | use core::char; |
17 | #[cfg (feature = "hardcoded-data" )] |
18 | use core::cmp::Ordering::{Equal, Greater, Less}; |
19 | |
20 | #[cfg (feature = "hardcoded-data" )] |
21 | use self::tables::bidi_class_table; |
22 | use crate::data_source::BidiMatchedOpeningBracket; |
23 | use crate::BidiClass::*; |
24 | #[cfg (feature = "hardcoded-data" )] |
25 | use crate::BidiDataSource; |
26 | /// Hardcoded Bidi data that ships with the unicode-bidi crate. |
27 | /// |
28 | /// This can be enabled with the default `hardcoded-data` Cargo feature. |
29 | #[cfg (feature = "hardcoded-data" )] |
30 | pub struct HardcodedBidiData; |
31 | |
32 | #[cfg (feature = "hardcoded-data" )] |
33 | impl BidiDataSource for HardcodedBidiData { |
34 | fn bidi_class(&self, c: char) -> BidiClass { |
35 | bsearch_range_value_table(c, bidi_class_table) |
36 | } |
37 | } |
38 | |
39 | /// Find the `BidiClass` of a single char. |
40 | #[cfg (feature = "hardcoded-data" )] |
41 | pub fn bidi_class(c: char) -> BidiClass { |
42 | bsearch_range_value_table(c, bidi_class_table) |
43 | } |
44 | |
45 | /// If this character is a bracket according to BidiBrackets.txt, |
46 | /// return the corresponding *normalized* *opening bracket* of the pair, |
47 | /// and whether or not it itself is an opening bracket. |
48 | pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> { |
49 | for pair: &'static (char, char, Option<…>) in self::tables::bidi_pairs_table { |
50 | if pair.0 == c || pair.1 == c { |
51 | let skeleton: char = pair.2.unwrap_or(default:pair.0); |
52 | return Some(BidiMatchedOpeningBracket { |
53 | opening: skeleton, |
54 | is_open: pair.0 == c, |
55 | }); |
56 | } |
57 | } |
58 | None |
59 | } |
60 | |
61 | pub fn is_rtl(bidi_class: BidiClass) -> bool { |
62 | matches!(bidi_class, RLE | RLO | RLI) |
63 | } |
64 | |
65 | #[cfg (feature = "hardcoded-data" )] |
66 | fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { |
67 | match r.binary_search_by(|&(lo: char, hi: char, _)| { |
68 | if lo <= c && c <= hi { |
69 | Equal |
70 | } else if hi < c { |
71 | Less |
72 | } else { |
73 | Greater |
74 | } |
75 | }) { |
76 | Ok(idx: usize) => { |
77 | let (_, _, cat: BidiClass) = r[idx]; |
78 | cat |
79 | } |
80 | // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed |
81 | // for Bidi_Class have the value Left_To_Right (L)." |
82 | Err(_) => L, |
83 | } |
84 | } |
85 | |
86 | #[cfg (all(test, feature = "hardcoded-data" ))] |
87 | mod tests { |
88 | use super::*; |
89 | |
90 | #[test ] |
91 | fn test_ascii() { |
92 | assert_eq!(bidi_class(' \u{0000}' ), BN); |
93 | assert_eq!(bidi_class(' \u{0040}' ), ON); |
94 | assert_eq!(bidi_class(' \u{0041}' ), L); |
95 | assert_eq!(bidi_class(' \u{0062}' ), L); |
96 | assert_eq!(bidi_class(' \u{007F}' ), BN); |
97 | } |
98 | |
99 | #[test ] |
100 | fn test_bmp() { |
101 | // Hebrew |
102 | assert_eq!(bidi_class(' \u{0590}' ), R); |
103 | assert_eq!(bidi_class(' \u{05D0}' ), R); |
104 | assert_eq!(bidi_class(' \u{05D1}' ), R); |
105 | assert_eq!(bidi_class(' \u{05FF}' ), R); |
106 | |
107 | // Arabic |
108 | assert_eq!(bidi_class(' \u{0600}' ), AN); |
109 | assert_eq!(bidi_class(' \u{0627}' ), AL); |
110 | assert_eq!(bidi_class(' \u{07BF}' ), AL); |
111 | |
112 | // Default R + Arabic Extras |
113 | assert_eq!(bidi_class(' \u{07C0}' ), R); |
114 | assert_eq!(bidi_class(' \u{085F}' ), R); |
115 | assert_eq!(bidi_class(' \u{0860}' ), AL); |
116 | assert_eq!(bidi_class(' \u{0870}' ), AL); |
117 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
118 | assert_eq!(bidi_class(' \u{08A0}' ), AL); |
119 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
120 | assert_eq!(bidi_class(' \u{08FF}' ), NSM); |
121 | |
122 | // Default ET |
123 | assert_eq!(bidi_class(' \u{20A0}' ), ET); |
124 | assert_eq!(bidi_class(' \u{20CF}' ), ET); |
125 | |
126 | // Arabic Presentation Forms |
127 | assert_eq!(bidi_class(' \u{FB1D}' ), R); |
128 | assert_eq!(bidi_class(' \u{FB4F}' ), R); |
129 | assert_eq!(bidi_class(' \u{FB50}' ), AL); |
130 | assert_eq!(bidi_class(' \u{FDCF}' ), ON); |
131 | assert_eq!(bidi_class(' \u{FDF0}' ), AL); |
132 | assert_eq!(bidi_class(' \u{FDFF}' ), ON); |
133 | assert_eq!(bidi_class(' \u{FE70}' ), AL); |
134 | assert_eq!(bidi_class(' \u{FEFE}' ), AL); |
135 | assert_eq!(bidi_class(' \u{FEFF}' ), BN); |
136 | |
137 | // noncharacters |
138 | assert_eq!(bidi_class(' \u{FDD0}' ), L); |
139 | assert_eq!(bidi_class(' \u{FDD1}' ), L); |
140 | assert_eq!(bidi_class(' \u{FDEE}' ), L); |
141 | assert_eq!(bidi_class(' \u{FDEF}' ), L); |
142 | assert_eq!(bidi_class(' \u{FFFE}' ), L); |
143 | assert_eq!(bidi_class(' \u{FFFF}' ), L); |
144 | } |
145 | |
146 | #[test ] |
147 | fn test_smp() { |
148 | // Default AL + R |
149 | assert_eq!(bidi_class(' \u{10800}' ), R); |
150 | assert_eq!(bidi_class(' \u{10FFF}' ), R); |
151 | assert_eq!(bidi_class(' \u{1E800}' ), R); |
152 | assert_eq!(bidi_class(' \u{1EDFF}' ), R); |
153 | assert_eq!(bidi_class(' \u{1EE00}' ), AL); |
154 | assert_eq!(bidi_class(' \u{1EEFF}' ), AL); |
155 | assert_eq!(bidi_class(' \u{1EF00}' ), R); |
156 | assert_eq!(bidi_class(' \u{1EFFF}' ), R); |
157 | } |
158 | |
159 | #[test ] |
160 | fn test_unassigned_planes() { |
161 | assert_eq!(bidi_class(' \u{30000}' ), L); |
162 | assert_eq!(bidi_class(' \u{40000}' ), L); |
163 | assert_eq!(bidi_class(' \u{50000}' ), L); |
164 | assert_eq!(bidi_class(' \u{60000}' ), L); |
165 | assert_eq!(bidi_class(' \u{70000}' ), L); |
166 | assert_eq!(bidi_class(' \u{80000}' ), L); |
167 | assert_eq!(bidi_class(' \u{90000}' ), L); |
168 | assert_eq!(bidi_class(' \u{a0000}' ), L); |
169 | } |
170 | } |
171 | |