| 1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution and at |
| 3 | // http://rust-lang.org/COPYRIGHT. |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | // option. This file may not be copied, modified, or distributed |
| 9 | // except according to those terms. |
| 10 | |
| 11 | //! Query character Unicode properties according to |
| 12 | //! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/) |
| 13 | //! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/) |
| 14 | //! rules. |
| 15 | //! |
| 16 | //! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`. |
| 17 | //! |
| 18 | //! Future properties can be added as requested. |
| 19 | //! |
| 20 | //! ```rust |
| 21 | //! use unicode_properties::UnicodeEmoji; |
| 22 | //! use unicode_properties::UnicodeGeneralCategory; |
| 23 | //! |
| 24 | //! let ch = '🦀' ; // U+1F980 CRAB |
| 25 | //! let is_emoji = ch.is_emoji_char(); |
| 26 | //! let group = ch.general_category_group(); |
| 27 | //! println!("{}({:?})" , ch, group); |
| 28 | //! println!("The above char {} for use as emoji char." , |
| 29 | //! if is_emoji { "is recommended" } else { "is not recommended" }); |
| 30 | //! ``` |
| 31 | //! |
| 32 | //! # Features |
| 33 | //! |
| 34 | //! ## `general-category` |
| 35 | //! |
| 36 | //! Provides the most general classification of a character, |
| 37 | //! based on its primary characteristic. |
| 38 | //! |
| 39 | //! ## `emoji` |
| 40 | //! |
| 41 | //! Provides the emoji character properties of a character. |
| 42 | //! |
| 43 | #![no_std ] |
| 44 | #![deny (missing_docs)] |
| 45 | |
| 46 | #[rustfmt::skip] |
| 47 | mod tables; |
| 48 | |
| 49 | #[cfg (feature = "emoji" )] |
| 50 | /// Query the emoji character properties of a character. |
| 51 | pub mod emoji { |
| 52 | pub use crate::tables::emoji::EmojiStatus; |
| 53 | |
| 54 | /// Query the emoji character properties of a character. |
| 55 | pub trait UnicodeEmoji: Sized { |
| 56 | /// Returns the emoji character properties in a status enum. |
| 57 | fn emoji_status(self) -> EmojiStatus; |
| 58 | |
| 59 | /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`. |
| 60 | #[allow (clippy::wrong_self_convention)] |
| 61 | fn is_emoji_char(self) -> bool { |
| 62 | crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status()) |
| 63 | } |
| 64 | |
| 65 | /// Checks whether this character are used in emoji sequences where they're not |
| 66 | /// intended for independent, direct input, i.e. `Emoji_Component=YES`. |
| 67 | #[allow (clippy::wrong_self_convention)] |
| 68 | fn is_emoji_component(self) -> bool { |
| 69 | crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status()) |
| 70 | } |
| 71 | |
| 72 | /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES` |
| 73 | #[allow (clippy::wrong_self_convention)] |
| 74 | fn is_emoji_char_or_emoji_component(self) -> bool { |
| 75 | crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component( |
| 76 | self.emoji_status(), |
| 77 | ) |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | impl UnicodeEmoji for char { |
| 82 | fn emoji_status(self) -> EmojiStatus { |
| 83 | crate::tables::emoji::emoji_status(self) |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | #[inline ] |
| 88 | /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character. |
| 89 | /// |
| 90 | /// It can be used between the elements of a sequence of characters to indicate that |
| 91 | /// a single glyph should be presented if available. |
| 92 | pub fn is_zwj(c: char) -> bool { |
| 93 | c == ' \u{200D}' |
| 94 | } |
| 95 | |
| 96 | #[inline ] |
| 97 | /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to |
| 98 | /// request an emoji presentation for an emoji character. |
| 99 | pub fn is_emoji_presentation_selector(c: char) -> bool { |
| 100 | c == ' \u{FE0F}' |
| 101 | } |
| 102 | |
| 103 | #[inline ] |
| 104 | /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to |
| 105 | /// request a text presentation for an emoji character. |
| 106 | pub fn is_text_presentation_selector(c: char) -> bool { |
| 107 | c == ' \u{FE0E}' |
| 108 | } |
| 109 | |
| 110 | #[inline ] |
| 111 | /// Checks whether this character is one of the Regional Indicator characters. |
| 112 | /// |
| 113 | /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence. |
| 114 | pub fn is_regional_indicator(c: char) -> bool { |
| 115 | matches!(c, ' \u{1F1E6}' ..=' \u{1F1FF}' ) |
| 116 | } |
| 117 | |
| 118 | #[inline ] |
| 119 | /// Checks whether this character is one of the Tag Characters. |
| 120 | /// |
| 121 | /// These can be used in indicating variants or extensions of emoji characters. |
| 122 | pub fn is_tag_character(c: char) -> bool { |
| 123 | matches!(c, ' \u{E0020}' ..=' \u{E007F}' ) |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | #[cfg (feature = "general-category" )] |
| 128 | /// Query the general category property of a character. |
| 129 | pub mod general_category { |
| 130 | pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup}; |
| 131 | |
| 132 | /// Query the general category property of a character. |
| 133 | /// |
| 134 | /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info. |
| 135 | pub trait UnicodeGeneralCategory: Sized { |
| 136 | /// Queries the most general classification of a character. |
| 137 | fn general_category(self) -> GeneralCategory; |
| 138 | |
| 139 | /// Queries the grouping of the most general classification of a character. |
| 140 | fn general_category_group(self) -> GeneralCategoryGroup { |
| 141 | crate::tables::general_category::general_category_group(self.general_category()) |
| 142 | } |
| 143 | |
| 144 | /// Queries whether the most general classification of a character belongs to the `LetterCased` group |
| 145 | /// |
| 146 | /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase` |
| 147 | /// categories, and is a subset of the `Letter` group. |
| 148 | #[allow (clippy::wrong_self_convention)] |
| 149 | fn is_letter_cased(self) -> bool { |
| 150 | crate::tables::general_category::general_category_is_letter_cased( |
| 151 | self.general_category(), |
| 152 | ) |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | impl UnicodeGeneralCategory for char { |
| 157 | fn general_category(self) -> GeneralCategory { |
| 158 | crate::tables::general_category::general_category_of_char(self) |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | pub use tables::UNICODE_VERSION; |
| 164 | |
| 165 | #[cfg (feature = "emoji" )] |
| 166 | #[doc (inline)] |
| 167 | pub use emoji::UnicodeEmoji; |
| 168 | |
| 169 | #[cfg (feature = "emoji" )] |
| 170 | #[doc (inline)] |
| 171 | pub use emoji::EmojiStatus; |
| 172 | |
| 173 | #[cfg (feature = "general-category" )] |
| 174 | #[doc (inline)] |
| 175 | pub use general_category::GeneralCategory; |
| 176 | |
| 177 | #[cfg (feature = "general-category" )] |
| 178 | #[doc (inline)] |
| 179 | pub use general_category::GeneralCategoryGroup; |
| 180 | |
| 181 | #[cfg (feature = "general-category" )] |
| 182 | #[doc (inline)] |
| 183 | pub use general_category::UnicodeGeneralCategory; |
| 184 | |