1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) |
11 | |
12 | mod tables; |
13 | |
14 | pub use self::tables::{BidiClass, UNICODE_VERSION}; |
15 | #[cfg (feature = "hardcoded-data" )] |
16 | use core::char; |
17 | #[cfg (feature = "hardcoded-data" )] |
18 | use core::cmp::Ordering::{Equal, Greater, Less}; |
19 | |
20 | #[cfg (feature = "hardcoded-data" )] |
21 | use self::tables::bidi_class_table; |
22 | use crate::data_source::BidiMatchedOpeningBracket; |
23 | use crate::BidiClass::*; |
24 | #[cfg (feature = "hardcoded-data" )] |
25 | use crate::BidiDataSource; |
26 | /// Hardcoded Bidi data that ships with the unicode-bidi crate. |
27 | /// |
28 | /// This can be enabled with the default `hardcoded-data` Cargo feature. |
29 | #[cfg (feature = "hardcoded-data" )] |
30 | pub struct HardcodedBidiData; |
31 | |
32 | #[cfg (feature = "hardcoded-data" )] |
33 | impl BidiDataSource for HardcodedBidiData { |
34 | fn bidi_class(&self, c: char) -> BidiClass { |
35 | bsearch_range_value_table(c, bidi_class_table) |
36 | } |
37 | } |
38 | |
39 | /// Find the `BidiClass` of a single char. |
40 | #[cfg (feature = "hardcoded-data" )] |
41 | pub fn bidi_class(c: char) -> BidiClass { |
42 | bsearch_range_value_table(c, bidi_class_table) |
43 | } |
44 | |
45 | /// If this character is a bracket according to BidiBrackets.txt, |
46 | /// return the corresponding *normalized* *opening bracket* of the pair, |
47 | /// and whether or not it itself is an opening bracket. |
48 | pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> { |
49 | for pair: &(char, char, Option) in self::tables::bidi_pairs_table { |
50 | if pair.0 == c || pair.1 == c { |
51 | let skeleton: char = pair.2.unwrap_or(default:pair.0); |
52 | return Some(BidiMatchedOpeningBracket { |
53 | opening: skeleton, |
54 | is_open: pair.0 == c, |
55 | }); |
56 | } |
57 | } |
58 | None |
59 | } |
60 | |
61 | pub fn is_rtl(bidi_class: BidiClass) -> bool { |
62 | match bidi_class { |
63 | RLE | RLO | RLI => true, |
64 | _ => false, |
65 | } |
66 | } |
67 | |
68 | #[cfg (feature = "hardcoded-data" )] |
69 | fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { |
70 | match r.binary_search_by(|&(lo: char, hi: char, _)| { |
71 | if lo <= c && c <= hi { |
72 | Equal |
73 | } else if hi < c { |
74 | Less |
75 | } else { |
76 | Greater |
77 | } |
78 | }) { |
79 | Ok(idx: usize) => { |
80 | let (_, _, cat: BidiClass) = r[idx]; |
81 | cat |
82 | } |
83 | // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed |
84 | // for Bidi_Class have the value Left_To_Right (L)." |
85 | Err(_) => L, |
86 | } |
87 | } |
88 | |
89 | #[cfg (all(test, feature = "hardcoded-data" ))] |
90 | mod tests { |
91 | use super::*; |
92 | |
93 | #[test ] |
94 | fn test_ascii() { |
95 | assert_eq!(bidi_class(' \u{0000}' ), BN); |
96 | assert_eq!(bidi_class(' \u{0040}' ), ON); |
97 | assert_eq!(bidi_class(' \u{0041}' ), L); |
98 | assert_eq!(bidi_class(' \u{0062}' ), L); |
99 | assert_eq!(bidi_class(' \u{007F}' ), BN); |
100 | } |
101 | |
102 | #[test ] |
103 | fn test_bmp() { |
104 | // Hebrew |
105 | assert_eq!(bidi_class(' \u{0590}' ), R); |
106 | assert_eq!(bidi_class(' \u{05D0}' ), R); |
107 | assert_eq!(bidi_class(' \u{05D1}' ), R); |
108 | assert_eq!(bidi_class(' \u{05FF}' ), R); |
109 | |
110 | // Arabic |
111 | assert_eq!(bidi_class(' \u{0600}' ), AN); |
112 | assert_eq!(bidi_class(' \u{0627}' ), AL); |
113 | assert_eq!(bidi_class(' \u{07BF}' ), AL); |
114 | |
115 | // Default R + Arabic Extras |
116 | assert_eq!(bidi_class(' \u{07C0}' ), R); |
117 | assert_eq!(bidi_class(' \u{085F}' ), R); |
118 | assert_eq!(bidi_class(' \u{0860}' ), AL); |
119 | assert_eq!(bidi_class(' \u{0870}' ), AL); |
120 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
121 | assert_eq!(bidi_class(' \u{08A0}' ), AL); |
122 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
123 | assert_eq!(bidi_class(' \u{08FF}' ), NSM); |
124 | |
125 | // Default ET |
126 | assert_eq!(bidi_class(' \u{20A0}' ), ET); |
127 | assert_eq!(bidi_class(' \u{20CF}' ), ET); |
128 | |
129 | // Arabic Presentation Forms |
130 | assert_eq!(bidi_class(' \u{FB1D}' ), R); |
131 | assert_eq!(bidi_class(' \u{FB4F}' ), R); |
132 | assert_eq!(bidi_class(' \u{FB50}' ), AL); |
133 | assert_eq!(bidi_class(' \u{FDCF}' ), ON); |
134 | assert_eq!(bidi_class(' \u{FDF0}' ), AL); |
135 | assert_eq!(bidi_class(' \u{FDFF}' ), ON); |
136 | assert_eq!(bidi_class(' \u{FE70}' ), AL); |
137 | assert_eq!(bidi_class(' \u{FEFE}' ), AL); |
138 | assert_eq!(bidi_class(' \u{FEFF}' ), BN); |
139 | |
140 | // noncharacters |
141 | assert_eq!(bidi_class(' \u{FDD0}' ), L); |
142 | assert_eq!(bidi_class(' \u{FDD1}' ), L); |
143 | assert_eq!(bidi_class(' \u{FDEE}' ), L); |
144 | assert_eq!(bidi_class(' \u{FDEF}' ), L); |
145 | assert_eq!(bidi_class(' \u{FFFE}' ), L); |
146 | assert_eq!(bidi_class(' \u{FFFF}' ), L); |
147 | } |
148 | |
149 | #[test ] |
150 | fn test_smp() { |
151 | // Default AL + R |
152 | assert_eq!(bidi_class(' \u{10800}' ), R); |
153 | assert_eq!(bidi_class(' \u{10FFF}' ), R); |
154 | assert_eq!(bidi_class(' \u{1E800}' ), R); |
155 | assert_eq!(bidi_class(' \u{1EDFF}' ), R); |
156 | assert_eq!(bidi_class(' \u{1EE00}' ), AL); |
157 | assert_eq!(bidi_class(' \u{1EEFF}' ), AL); |
158 | assert_eq!(bidi_class(' \u{1EF00}' ), R); |
159 | assert_eq!(bidi_class(' \u{1EFFF}' ), R); |
160 | } |
161 | |
162 | #[test ] |
163 | fn test_unassigned_planes() { |
164 | assert_eq!(bidi_class(' \u{30000}' ), L); |
165 | assert_eq!(bidi_class(' \u{40000}' ), L); |
166 | assert_eq!(bidi_class(' \u{50000}' ), L); |
167 | assert_eq!(bidi_class(' \u{60000}' ), L); |
168 | assert_eq!(bidi_class(' \u{70000}' ), L); |
169 | assert_eq!(bidi_class(' \u{80000}' ), L); |
170 | assert_eq!(bidi_class(' \u{90000}' ), L); |
171 | assert_eq!(bidi_class(' \u{a0000}' ), L); |
172 | } |
173 | } |
174 | |