1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Query character Unicode properties according to
12//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
13//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
14//! rules.
15//!
16//! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`.
17//!
18//! Future properties can be added as requested.
19//!
20//! ```rust
21//! use unicode_properties::UnicodeEmoji;
22//! use unicode_properties::UnicodeGeneralCategory;
23//!
24//! fn main() {
25//! let ch = '🦀'; // U+1F980 CRAB
26//! let is_emoji = ch.is_emoji_char();
27//! let group = ch.general_category_group();
28//! println!("{}({:?})", ch, group);
29//! println!("The above char {} for use as emoji char.",
30//! if is_emoji { "is recommended" } else { "is not recommended" });
31//! }
32//! ```
33//!
34//! # Features
35//!
36//! ## `general-category`
37//!
38//! Provides the most general classification of a character,
39//! based on its primary characteristic.
40//!
41//! ## `emoji`
42//!
43//! Provides the emoji character properties of a character.
44//!
45#![deny(missing_docs)]
46
47#[rustfmt::skip]
48mod tables;
49
50#[cfg(feature = "emoji")]
51/// Query the emoji character properties of a character.
52pub mod emoji {
53 pub use crate::tables::emoji::EmojiStatus;
54
55 /// Query the emoji character properties of a character.
56 pub trait UnicodeEmoji: Sized {
57 /// Returns the emoji character properties in a status enum.
58 fn emoji_status(self) -> EmojiStatus;
59
60 /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
61 fn is_emoji_char(self) -> bool {
62 crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
63 }
64
65 /// Checks whether this character are used in emoji sequences where they're not
66 /// intended for independent, direct input, i.e. `Emoji_Component=YES`.
67 fn is_emoji_component(self) -> bool {
68 crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
69 }
70
71 /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
72 fn is_emoji_char_or_emoji_component(self) -> bool {
73 crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
74 self.emoji_status(),
75 )
76 }
77 }
78
79 impl UnicodeEmoji for char {
80 fn emoji_status(self) -> EmojiStatus {
81 crate::tables::emoji::emoji_status(self)
82 }
83 }
84
85 #[inline]
86 /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
87 ///
88 /// It can be used between the elements of a sequence of characters to indicate that
89 /// a single glyph should be presented if available.
90 pub fn is_zwj(c: char) -> bool {
91 c == '\u{200D}'
92 }
93
94 #[inline]
95 /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
96 /// request an emoji presentation for an emoji character.
97 pub fn is_emoji_presentation_selector(c: char) -> bool {
98 c == '\u{FE0F}'
99 }
100
101 #[inline]
102 /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
103 /// request a text presentation for an emoji character.
104 pub fn is_text_presentation_selector(c: char) -> bool {
105 c == '\u{FE0E}'
106 }
107
108 #[inline]
109 /// Checks whether this character is one of the Regional Indicator characters.
110 ///
111 /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
112 pub fn is_regional_indicator(c: char) -> bool {
113 matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
114 }
115
116 #[inline]
117 /// Checks whether this character is one of the Tag Characters.
118 ///
119 /// These can be used in indicating variants or extensions of emoji characters.
120 pub fn is_tag_character(c: char) -> bool {
121 matches!(c, '\u{E0020}'..='\u{E007F}')
122 }
123}
124
125#[cfg(feature = "general-category")]
126/// Query the general category property of a character.
127pub mod general_category {
128 pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
129
130 /// Query the general category property of a character.
131 ///
132 /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
133 pub trait UnicodeGeneralCategory: Sized {
134 /// Queries the most general classification of a character.
135 fn general_category(self) -> GeneralCategory;
136
137 /// Queries the grouping of the most general classification of a character.
138 fn general_category_group(self) -> GeneralCategoryGroup {
139 crate::tables::general_category::general_category_group(self.general_category())
140 }
141
142 /// Queries whether the most general classification of a character belongs to the `LetterCased` group
143 ///
144 /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
145 /// categories, and is a subset of the `Letter` group.
146 fn is_letter_cased(self) -> bool {
147 crate::tables::general_category::general_category_is_letter_cased(
148 self.general_category(),
149 )
150 }
151 }
152
153 impl UnicodeGeneralCategory for char {
154 fn general_category(self) -> GeneralCategory {
155 crate::tables::general_category::general_category_of_char(self)
156 }
157 }
158}
159
160pub use tables::UNICODE_VERSION;
161
162#[cfg(feature = "emoji")]
163#[doc(inline)]
164pub use emoji::UnicodeEmoji;
165
166#[cfg(feature = "emoji")]
167#[doc(inline)]
168pub use emoji::EmojiStatus;
169
170#[cfg(feature = "general-category")]
171#[doc(inline)]
172pub use general_category::GeneralCategory;
173
174#[cfg(feature = "general-category")]
175#[doc(inline)]
176pub use general_category::GeneralCategoryGroup;
177
178#[cfg(feature = "general-category")]
179#[doc(inline)]
180pub use general_category::UnicodeGeneralCategory;
181