| 1 | // Copyright 2015 The Servo Project Developers. See the |
| 2 | // COPYRIGHT file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) |
| 11 | |
| 12 | mod tables; |
| 13 | |
| 14 | pub use self::tables::{BidiClass, UNICODE_VERSION}; |
| 15 | #[cfg (feature = "hardcoded-data" )] |
| 16 | use core::char; |
| 17 | #[cfg (feature = "hardcoded-data" )] |
| 18 | use core::cmp::Ordering::{Equal, Greater, Less}; |
| 19 | |
| 20 | #[cfg (feature = "hardcoded-data" )] |
| 21 | use self::tables::bidi_class_table; |
| 22 | use crate::data_source::BidiMatchedOpeningBracket; |
| 23 | use crate::BidiClass::*; |
| 24 | #[cfg (feature = "hardcoded-data" )] |
| 25 | use crate::BidiDataSource; |
| 26 | /// Hardcoded Bidi data that ships with the unicode-bidi crate. |
| 27 | /// |
| 28 | /// This can be enabled with the default `hardcoded-data` Cargo feature. |
| 29 | #[cfg (feature = "hardcoded-data" )] |
| 30 | pub struct HardcodedBidiData; |
| 31 | |
| 32 | #[cfg (feature = "hardcoded-data" )] |
| 33 | impl BidiDataSource for HardcodedBidiData { |
| 34 | fn bidi_class(&self, c: char) -> BidiClass { |
| 35 | bsearch_range_value_table(c, bidi_class_table) |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | /// Find the `BidiClass` of a single char. |
| 40 | #[cfg (feature = "hardcoded-data" )] |
| 41 | pub fn bidi_class(c: char) -> BidiClass { |
| 42 | bsearch_range_value_table(c, bidi_class_table) |
| 43 | } |
| 44 | |
| 45 | /// If this character is a bracket according to BidiBrackets.txt, |
| 46 | /// return the corresponding *normalized* *opening bracket* of the pair, |
| 47 | /// and whether or not it itself is an opening bracket. |
| 48 | pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> { |
| 49 | for pair: &'static (char, char, Option<…>) in self::tables::bidi_pairs_table { |
| 50 | if pair.0 == c || pair.1 == c { |
| 51 | let skeleton: char = pair.2.unwrap_or(default:pair.0); |
| 52 | return Some(BidiMatchedOpeningBracket { |
| 53 | opening: skeleton, |
| 54 | is_open: pair.0 == c, |
| 55 | }); |
| 56 | } |
| 57 | } |
| 58 | None |
| 59 | } |
| 60 | |
| 61 | pub fn is_rtl(bidi_class: BidiClass) -> bool { |
| 62 | matches!(bidi_class, RLE | RLO | RLI) |
| 63 | } |
| 64 | |
| 65 | #[cfg (feature = "hardcoded-data" )] |
| 66 | fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { |
| 67 | match r.binary_search_by(|&(lo: char, hi: char, _)| { |
| 68 | if lo <= c && c <= hi { |
| 69 | Equal |
| 70 | } else if hi < c { |
| 71 | Less |
| 72 | } else { |
| 73 | Greater |
| 74 | } |
| 75 | }) { |
| 76 | Ok(idx: usize) => { |
| 77 | let (_, _, cat: BidiClass) = r[idx]; |
| 78 | cat |
| 79 | } |
| 80 | // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed |
| 81 | // for Bidi_Class have the value Left_To_Right (L)." |
| 82 | Err(_) => L, |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | #[cfg (all(test, feature = "hardcoded-data" ))] |
| 87 | mod tests { |
| 88 | use super::*; |
| 89 | |
| 90 | #[test ] |
| 91 | fn test_ascii() { |
| 92 | assert_eq!(bidi_class(' \u{0000}' ), BN); |
| 93 | assert_eq!(bidi_class(' \u{0040}' ), ON); |
| 94 | assert_eq!(bidi_class(' \u{0041}' ), L); |
| 95 | assert_eq!(bidi_class(' \u{0062}' ), L); |
| 96 | assert_eq!(bidi_class(' \u{007F}' ), BN); |
| 97 | } |
| 98 | |
| 99 | #[test ] |
| 100 | fn test_bmp() { |
| 101 | // Hebrew |
| 102 | assert_eq!(bidi_class(' \u{0590}' ), R); |
| 103 | assert_eq!(bidi_class(' \u{05D0}' ), R); |
| 104 | assert_eq!(bidi_class(' \u{05D1}' ), R); |
| 105 | assert_eq!(bidi_class(' \u{05FF}' ), R); |
| 106 | |
| 107 | // Arabic |
| 108 | assert_eq!(bidi_class(' \u{0600}' ), AN); |
| 109 | assert_eq!(bidi_class(' \u{0627}' ), AL); |
| 110 | assert_eq!(bidi_class(' \u{07BF}' ), AL); |
| 111 | |
| 112 | // Default R + Arabic Extras |
| 113 | assert_eq!(bidi_class(' \u{07C0}' ), R); |
| 114 | assert_eq!(bidi_class(' \u{085F}' ), R); |
| 115 | assert_eq!(bidi_class(' \u{0860}' ), AL); |
| 116 | assert_eq!(bidi_class(' \u{0870}' ), AL); |
| 117 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
| 118 | assert_eq!(bidi_class(' \u{08A0}' ), AL); |
| 119 | assert_eq!(bidi_class(' \u{089F}' ), NSM); |
| 120 | assert_eq!(bidi_class(' \u{08FF}' ), NSM); |
| 121 | |
| 122 | // Default ET |
| 123 | assert_eq!(bidi_class(' \u{20A0}' ), ET); |
| 124 | assert_eq!(bidi_class(' \u{20CF}' ), ET); |
| 125 | |
| 126 | // Arabic Presentation Forms |
| 127 | assert_eq!(bidi_class(' \u{FB1D}' ), R); |
| 128 | assert_eq!(bidi_class(' \u{FB4F}' ), R); |
| 129 | assert_eq!(bidi_class(' \u{FB50}' ), AL); |
| 130 | assert_eq!(bidi_class(' \u{FDCF}' ), ON); |
| 131 | assert_eq!(bidi_class(' \u{FDF0}' ), AL); |
| 132 | assert_eq!(bidi_class(' \u{FDFF}' ), ON); |
| 133 | assert_eq!(bidi_class(' \u{FE70}' ), AL); |
| 134 | assert_eq!(bidi_class(' \u{FEFE}' ), AL); |
| 135 | assert_eq!(bidi_class(' \u{FEFF}' ), BN); |
| 136 | |
| 137 | // noncharacters |
| 138 | assert_eq!(bidi_class(' \u{FDD0}' ), L); |
| 139 | assert_eq!(bidi_class(' \u{FDD1}' ), L); |
| 140 | assert_eq!(bidi_class(' \u{FDEE}' ), L); |
| 141 | assert_eq!(bidi_class(' \u{FDEF}' ), L); |
| 142 | assert_eq!(bidi_class(' \u{FFFE}' ), L); |
| 143 | assert_eq!(bidi_class(' \u{FFFF}' ), L); |
| 144 | } |
| 145 | |
| 146 | #[test ] |
| 147 | fn test_smp() { |
| 148 | // Default AL + R |
| 149 | assert_eq!(bidi_class(' \u{10800}' ), R); |
| 150 | assert_eq!(bidi_class(' \u{10FFF}' ), R); |
| 151 | assert_eq!(bidi_class(' \u{1E800}' ), R); |
| 152 | assert_eq!(bidi_class(' \u{1EDFF}' ), R); |
| 153 | assert_eq!(bidi_class(' \u{1EE00}' ), AL); |
| 154 | assert_eq!(bidi_class(' \u{1EEFF}' ), AL); |
| 155 | assert_eq!(bidi_class(' \u{1EF00}' ), R); |
| 156 | assert_eq!(bidi_class(' \u{1EFFF}' ), R); |
| 157 | } |
| 158 | |
| 159 | #[test ] |
| 160 | fn test_unassigned_planes() { |
| 161 | assert_eq!(bidi_class(' \u{30000}' ), L); |
| 162 | assert_eq!(bidi_class(' \u{40000}' ), L); |
| 163 | assert_eq!(bidi_class(' \u{50000}' ), L); |
| 164 | assert_eq!(bidi_class(' \u{60000}' ), L); |
| 165 | assert_eq!(bidi_class(' \u{70000}' ), L); |
| 166 | assert_eq!(bidi_class(' \u{80000}' ), L); |
| 167 | assert_eq!(bidi_class(' \u{90000}' ), L); |
| 168 | assert_eq!(bidi_class(' \u{a0000}' ), L); |
| 169 | } |
| 170 | } |
| 171 | |