1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Query character Unicode properties according to |
12 | //! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/) |
13 | //! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/) |
14 | //! rules. |
15 | //! |
16 | //! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`. |
17 | //! |
18 | //! Future properties can be added as requested. |
19 | //! |
20 | //! ```rust |
21 | //! use unicode_properties::UnicodeEmoji; |
22 | //! use unicode_properties::UnicodeGeneralCategory; |
23 | //! |
24 | //! fn main() { |
25 | //! let ch = '🦀' ; // U+1F980 CRAB |
26 | //! let is_emoji = ch.is_emoji_char(); |
27 | //! let group = ch.general_category_group(); |
28 | //! println!("{}({:?})" , ch, group); |
29 | //! println!("The above char {} for use as emoji char." , |
30 | //! if is_emoji { "is recommended" } else { "is not recommended" }); |
31 | //! } |
32 | //! ``` |
33 | //! |
34 | //! # Features |
35 | //! |
36 | //! ## `general-category` |
37 | //! |
38 | //! Provides the most general classification of a character, |
39 | //! based on its primary characteristic. |
40 | //! |
41 | //! ## `emoji` |
42 | //! |
43 | //! Provides the emoji character properties of a character. |
44 | //! |
45 | #![deny (missing_docs)] |
46 | |
47 | #[rustfmt::skip] |
48 | mod tables; |
49 | |
50 | #[cfg (feature = "emoji" )] |
51 | /// Query the emoji character properties of a character. |
52 | pub mod emoji { |
53 | pub use crate::tables::emoji::EmojiStatus; |
54 | |
55 | /// Query the emoji character properties of a character. |
56 | pub trait UnicodeEmoji: Sized { |
57 | /// Returns the emoji character properties in a status enum. |
58 | fn emoji_status(self) -> EmojiStatus; |
59 | |
60 | /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`. |
61 | fn is_emoji_char(self) -> bool { |
62 | crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status()) |
63 | } |
64 | |
65 | /// Checks whether this character are used in emoji sequences where they're not |
66 | /// intended for independent, direct input, i.e. `Emoji_Component=YES`. |
67 | fn is_emoji_component(self) -> bool { |
68 | crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status()) |
69 | } |
70 | |
71 | /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES` |
72 | fn is_emoji_char_or_emoji_component(self) -> bool { |
73 | crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component( |
74 | self.emoji_status(), |
75 | ) |
76 | } |
77 | } |
78 | |
79 | impl UnicodeEmoji for char { |
80 | fn emoji_status(self) -> EmojiStatus { |
81 | crate::tables::emoji::emoji_status(self) |
82 | } |
83 | } |
84 | |
85 | #[inline ] |
86 | /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character. |
87 | /// |
88 | /// It can be used between the elements of a sequence of characters to indicate that |
89 | /// a single glyph should be presented if available. |
90 | pub fn is_zwj(c: char) -> bool { |
91 | c == ' \u{200D}' |
92 | } |
93 | |
94 | #[inline ] |
95 | /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to |
96 | /// request an emoji presentation for an emoji character. |
97 | pub fn is_emoji_presentation_selector(c: char) -> bool { |
98 | c == ' \u{FE0F}' |
99 | } |
100 | |
101 | #[inline ] |
102 | /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to |
103 | /// request a text presentation for an emoji character. |
104 | pub fn is_text_presentation_selector(c: char) -> bool { |
105 | c == ' \u{FE0E}' |
106 | } |
107 | |
108 | #[inline ] |
109 | /// Checks whether this character is one of the Regional Indicator characters. |
110 | /// |
111 | /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence. |
112 | pub fn is_regional_indicator(c: char) -> bool { |
113 | matches!(c, ' \u{1F1E6}' ..=' \u{1F1FF}' ) |
114 | } |
115 | |
116 | #[inline ] |
117 | /// Checks whether this character is one of the Tag Characters. |
118 | /// |
119 | /// These can be used in indicating variants or extensions of emoji characters. |
120 | pub fn is_tag_character(c: char) -> bool { |
121 | matches!(c, ' \u{E0020}' ..=' \u{E007F}' ) |
122 | } |
123 | } |
124 | |
125 | #[cfg (feature = "general-category" )] |
126 | /// Query the general category property of a character. |
127 | pub mod general_category { |
128 | pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup}; |
129 | |
130 | /// Query the general category property of a character. |
131 | /// |
132 | /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info. |
133 | pub trait UnicodeGeneralCategory: Sized { |
134 | /// Queries the most general classification of a character. |
135 | fn general_category(self) -> GeneralCategory; |
136 | |
137 | /// Queries the grouping of the most general classification of a character. |
138 | fn general_category_group(self) -> GeneralCategoryGroup { |
139 | crate::tables::general_category::general_category_group(self.general_category()) |
140 | } |
141 | |
142 | /// Queries whether the most general classification of a character belongs to the `LetterCased` group |
143 | /// |
144 | /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase` |
145 | /// categories, and is a subset of the `Letter` group. |
146 | fn is_letter_cased(self) -> bool { |
147 | crate::tables::general_category::general_category_is_letter_cased( |
148 | self.general_category(), |
149 | ) |
150 | } |
151 | } |
152 | |
153 | impl UnicodeGeneralCategory for char { |
154 | fn general_category(self) -> GeneralCategory { |
155 | crate::tables::general_category::general_category_of_char(self) |
156 | } |
157 | } |
158 | } |
159 | |
160 | pub use tables::UNICODE_VERSION; |
161 | |
162 | #[cfg (feature = "emoji" )] |
163 | #[doc (inline)] |
164 | pub use emoji::UnicodeEmoji; |
165 | |
166 | #[cfg (feature = "emoji" )] |
167 | #[doc (inline)] |
168 | pub use emoji::EmojiStatus; |
169 | |
170 | #[cfg (feature = "general-category" )] |
171 | #[doc (inline)] |
172 | pub use general_category::GeneralCategory; |
173 | |
174 | #[cfg (feature = "general-category" )] |
175 | #[doc (inline)] |
176 | pub use general_category::GeneralCategoryGroup; |
177 | |
178 | #[cfg (feature = "general-category" )] |
179 | #[doc (inline)] |
180 | pub use general_category::UnicodeGeneralCategory; |
181 | |