1 | // Copyright 2019 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Lookups of unicode properties using minimal perfect hashing. |
12 | |
13 | use crate::perfect_hash::mph_lookup; |
14 | use crate::tables::*; |
15 | |
16 | /// Look up the canonical combining class for a codepoint. |
17 | /// |
18 | /// The value returned is as defined in the Unicode Character Database. |
19 | pub fn canonical_combining_class(c: char) -> u8 { |
20 | mph_lookup( |
21 | x:c.into(), |
22 | CANONICAL_COMBINING_CLASS_SALT, |
23 | CANONICAL_COMBINING_CLASS_KV, |
24 | u8_lookup_fk, |
25 | u8_lookup_fv, |
26 | default:0, |
27 | ) |
28 | } |
29 | |
30 | pub(crate) fn composition_table(c1: char, c2: char) -> Option<char> { |
31 | if c1 < ' \u{10000}' && c2 < ' \u{10000}' { |
32 | mph_lookup( |
33 | (c1 as u32) << 16 | (c2 as u32), |
34 | COMPOSITION_TABLE_SALT, |
35 | COMPOSITION_TABLE_KV, |
36 | pair_lookup_fk, |
37 | fv:pair_lookup_fv_opt, |
38 | default:None, |
39 | ) |
40 | } else { |
41 | composition_table_astral(c1, c2) |
42 | } |
43 | } |
44 | |
45 | pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> { |
46 | mph_lookupOption<(u16, u16)>( |
47 | x:c.into(), |
48 | CANONICAL_DECOMPOSED_SALT, |
49 | CANONICAL_DECOMPOSED_KV, |
50 | pair_lookup_fk, |
51 | fv:pair_lookup_fv_opt, |
52 | default:None, |
53 | ) |
54 | .map(|(start: u16, len: u16)| &CANONICAL_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
55 | } |
56 | |
57 | pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> { |
58 | mph_lookupOption<(u16, u16)>( |
59 | x:c.into(), |
60 | COMPATIBILITY_DECOMPOSED_SALT, |
61 | COMPATIBILITY_DECOMPOSED_KV, |
62 | pair_lookup_fk, |
63 | fv:pair_lookup_fv_opt, |
64 | default:None, |
65 | ) |
66 | .map(|(start: u16, len: u16)| &COMPATIBILITY_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
67 | } |
68 | |
69 | pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [char]> { |
70 | mph_lookupOption<(u16, u16)>( |
71 | x:c.into(), |
72 | CJK_COMPAT_VARIANTS_DECOMPOSED_SALT, |
73 | CJK_COMPAT_VARIANTS_DECOMPOSED_KV, |
74 | pair_lookup_fk, |
75 | fv:pair_lookup_fv_opt, |
76 | default:None, |
77 | ) |
78 | .map(|(start: u16, len: u16)| &CJK_COMPAT_VARIANTS_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
79 | } |
80 | |
81 | /// Return whether the given character is a combining mark (`General_Category=Mark`) |
82 | pub fn is_combining_mark(c: char) -> bool { |
83 | mph_lookup( |
84 | x:c.into(), |
85 | COMBINING_MARK_SALT, |
86 | COMBINING_MARK_KV, |
87 | bool_lookup_fk, |
88 | bool_lookup_fv, |
89 | default:false, |
90 | ) |
91 | } |
92 | |
93 | pub fn stream_safe_trailing_nonstarters(c: char) -> usize { |
94 | mph_lookup( |
95 | x:c.into(), |
96 | TRAILING_NONSTARTERS_SALT, |
97 | TRAILING_NONSTARTERS_KV, |
98 | u8_lookup_fk, |
99 | u8_lookup_fv, |
100 | default:0, |
101 | ) as usize |
102 | } |
103 | |
104 | /// Extract the key in a 24 bit key and 8 bit value packed in a u32. |
105 | #[inline ] |
106 | fn u8_lookup_fk(kv: u32) -> u32 { |
107 | kv >> 8 |
108 | } |
109 | |
110 | /// Extract the value in a 24 bit key and 8 bit value packed in a u32. |
111 | #[inline ] |
112 | fn u8_lookup_fv(kv: u32) -> u8 { |
113 | (kv & 0xff) as u8 |
114 | } |
115 | |
116 | /// Extract the key for a boolean lookup. |
117 | #[inline ] |
118 | fn bool_lookup_fk(kv: u32) -> u32 { |
119 | kv |
120 | } |
121 | |
122 | /// Extract the value for a boolean lookup. |
123 | #[inline ] |
124 | fn bool_lookup_fv(_kv: u32) -> bool { |
125 | true |
126 | } |
127 | |
128 | /// Extract the key in a pair. |
129 | #[inline ] |
130 | fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 { |
131 | kv.0 |
132 | } |
133 | |
134 | /// Extract the value in a pair, returning an option. |
135 | #[inline ] |
136 | fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> { |
137 | Some(kv.1) |
138 | } |
139 | |