1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Query character Unicode properties according to |
12 | //! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/) |
13 | //! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/) |
14 | //! rules. |
15 | //! |
16 | //! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`. |
17 | //! |
18 | //! Future properties can be added as requested. |
19 | //! |
20 | //! ```rust |
21 | //! use unicode_properties::UnicodeEmoji; |
22 | //! use unicode_properties::UnicodeGeneralCategory; |
23 | //! |
24 | //! fn main() { |
25 | //! let ch = '🦀' ; // U+1F980 CRAB |
26 | //! let is_emoji = ch.is_emoji_char(); |
27 | //! let group = ch.general_category_group(); |
28 | //! println!("{}({:?})" , ch, group); |
29 | //! println!("The above char {} for use as emoji char." , |
30 | //! if is_emoji { "is recommended" } else { "is not recommended" }); |
31 | //! } |
32 | //! ``` |
33 | //! |
34 | //! # Features |
35 | //! |
36 | //! ## `general-category` |
37 | //! |
38 | //! Provides the most general classification of a character, |
39 | //! based on its primary characteristic. |
40 | //! |
41 | //! ## `emoji` |
42 | //! |
43 | //! Provides the emoji character properties of a character. |
44 | //! |
45 | #![no_std ] |
46 | #![deny (missing_docs)] |
47 | |
48 | #[rustfmt::skip] |
49 | mod tables; |
50 | |
51 | #[cfg (feature = "emoji" )] |
52 | /// Query the emoji character properties of a character. |
53 | pub mod emoji { |
54 | pub use crate::tables::emoji::EmojiStatus; |
55 | |
56 | /// Query the emoji character properties of a character. |
57 | pub trait UnicodeEmoji: Sized { |
58 | /// Returns the emoji character properties in a status enum. |
59 | fn emoji_status(self) -> EmojiStatus; |
60 | |
61 | /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`. |
62 | fn is_emoji_char(self) -> bool { |
63 | crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status()) |
64 | } |
65 | |
66 | /// Checks whether this character are used in emoji sequences where they're not |
67 | /// intended for independent, direct input, i.e. `Emoji_Component=YES`. |
68 | fn is_emoji_component(self) -> bool { |
69 | crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status()) |
70 | } |
71 | |
72 | /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES` |
73 | fn is_emoji_char_or_emoji_component(self) -> bool { |
74 | crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component( |
75 | self.emoji_status(), |
76 | ) |
77 | } |
78 | } |
79 | |
80 | impl UnicodeEmoji for char { |
81 | fn emoji_status(self) -> EmojiStatus { |
82 | crate::tables::emoji::emoji_status(self) |
83 | } |
84 | } |
85 | |
86 | #[inline ] |
87 | /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character. |
88 | /// |
89 | /// It can be used between the elements of a sequence of characters to indicate that |
90 | /// a single glyph should be presented if available. |
91 | pub fn is_zwj(c: char) -> bool { |
92 | c == ' \u{200D}' |
93 | } |
94 | |
95 | #[inline ] |
96 | /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to |
97 | /// request an emoji presentation for an emoji character. |
98 | pub fn is_emoji_presentation_selector(c: char) -> bool { |
99 | c == ' \u{FE0F}' |
100 | } |
101 | |
102 | #[inline ] |
103 | /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to |
104 | /// request a text presentation for an emoji character. |
105 | pub fn is_text_presentation_selector(c: char) -> bool { |
106 | c == ' \u{FE0E}' |
107 | } |
108 | |
109 | #[inline ] |
110 | /// Checks whether this character is one of the Regional Indicator characters. |
111 | /// |
112 | /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence. |
113 | pub fn is_regional_indicator(c: char) -> bool { |
114 | matches!(c, ' \u{1F1E6}' ..=' \u{1F1FF}' ) |
115 | } |
116 | |
117 | #[inline ] |
118 | /// Checks whether this character is one of the Tag Characters. |
119 | /// |
120 | /// These can be used in indicating variants or extensions of emoji characters. |
121 | pub fn is_tag_character(c: char) -> bool { |
122 | matches!(c, ' \u{E0020}' ..=' \u{E007F}' ) |
123 | } |
124 | } |
125 | |
126 | #[cfg (feature = "general-category" )] |
127 | /// Query the general category property of a character. |
128 | pub mod general_category { |
129 | pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup}; |
130 | |
131 | /// Query the general category property of a character. |
132 | /// |
133 | /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info. |
134 | pub trait UnicodeGeneralCategory: Sized { |
135 | /// Queries the most general classification of a character. |
136 | fn general_category(self) -> GeneralCategory; |
137 | |
138 | /// Queries the grouping of the most general classification of a character. |
139 | fn general_category_group(self) -> GeneralCategoryGroup { |
140 | crate::tables::general_category::general_category_group(self.general_category()) |
141 | } |
142 | |
143 | /// Queries whether the most general classification of a character belongs to the `LetterCased` group |
144 | /// |
145 | /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase` |
146 | /// categories, and is a subset of the `Letter` group. |
147 | fn is_letter_cased(self) -> bool { |
148 | crate::tables::general_category::general_category_is_letter_cased( |
149 | self.general_category(), |
150 | ) |
151 | } |
152 | } |
153 | |
154 | impl UnicodeGeneralCategory for char { |
155 | fn general_category(self) -> GeneralCategory { |
156 | crate::tables::general_category::general_category_of_char(self) |
157 | } |
158 | } |
159 | } |
160 | |
161 | pub use tables::UNICODE_VERSION; |
162 | |
163 | #[cfg (feature = "emoji" )] |
164 | #[doc (inline)] |
165 | pub use emoji::UnicodeEmoji; |
166 | |
167 | #[cfg (feature = "emoji" )] |
168 | #[doc (inline)] |
169 | pub use emoji::EmojiStatus; |
170 | |
171 | #[cfg (feature = "general-category" )] |
172 | #[doc (inline)] |
173 | pub use general_category::GeneralCategory; |
174 | |
175 | #[cfg (feature = "general-category" )] |
176 | #[doc (inline)] |
177 | pub use general_category::GeneralCategoryGroup; |
178 | |
179 | #[cfg (feature = "general-category" )] |
180 | #[doc (inline)] |
181 | pub use general_category::UnicodeGeneralCategory; |
182 | |