1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Query character Unicode properties according to
12//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
13//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
14//! rules.
15//!
16//! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`.
17//!
18//! Future properties can be added as requested.
19//!
20//! ```rust
21//! use unicode_properties::UnicodeEmoji;
22//! use unicode_properties::UnicodeGeneralCategory;
23//!
24//! fn main() {
25//! let ch = '🦀'; // U+1F980 CRAB
26//! let is_emoji = ch.is_emoji_char();
27//! let group = ch.general_category_group();
28//! println!("{}({:?})", ch, group);
29//! println!("The above char {} for use as emoji char.",
30//! if is_emoji { "is recommended" } else { "is not recommended" });
31//! }
32//! ```
33//!
34//! # Features
35//!
36//! ## `general-category`
37//!
38//! Provides the most general classification of a character,
39//! based on its primary characteristic.
40//!
41//! ## `emoji`
42//!
43//! Provides the emoji character properties of a character.
44//!
45#![no_std]
46#![deny(missing_docs)]
47
48#[rustfmt::skip]
49mod tables;
50
51#[cfg(feature = "emoji")]
52/// Query the emoji character properties of a character.
53pub mod emoji {
54 pub use crate::tables::emoji::EmojiStatus;
55
56 /// Query the emoji character properties of a character.
57 pub trait UnicodeEmoji: Sized {
58 /// Returns the emoji character properties in a status enum.
59 fn emoji_status(self) -> EmojiStatus;
60
61 /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
62 fn is_emoji_char(self) -> bool {
63 crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
64 }
65
66 /// Checks whether this character are used in emoji sequences where they're not
67 /// intended for independent, direct input, i.e. `Emoji_Component=YES`.
68 fn is_emoji_component(self) -> bool {
69 crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
70 }
71
72 /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
73 fn is_emoji_char_or_emoji_component(self) -> bool {
74 crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
75 self.emoji_status(),
76 )
77 }
78 }
79
80 impl UnicodeEmoji for char {
81 fn emoji_status(self) -> EmojiStatus {
82 crate::tables::emoji::emoji_status(self)
83 }
84 }
85
86 #[inline]
87 /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
88 ///
89 /// It can be used between the elements of a sequence of characters to indicate that
90 /// a single glyph should be presented if available.
91 pub fn is_zwj(c: char) -> bool {
92 c == '\u{200D}'
93 }
94
95 #[inline]
96 /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
97 /// request an emoji presentation for an emoji character.
98 pub fn is_emoji_presentation_selector(c: char) -> bool {
99 c == '\u{FE0F}'
100 }
101
102 #[inline]
103 /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
104 /// request a text presentation for an emoji character.
105 pub fn is_text_presentation_selector(c: char) -> bool {
106 c == '\u{FE0E}'
107 }
108
109 #[inline]
110 /// Checks whether this character is one of the Regional Indicator characters.
111 ///
112 /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
113 pub fn is_regional_indicator(c: char) -> bool {
114 matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
115 }
116
117 #[inline]
118 /// Checks whether this character is one of the Tag Characters.
119 ///
120 /// These can be used in indicating variants or extensions of emoji characters.
121 pub fn is_tag_character(c: char) -> bool {
122 matches!(c, '\u{E0020}'..='\u{E007F}')
123 }
124}
125
126#[cfg(feature = "general-category")]
127/// Query the general category property of a character.
128pub mod general_category {
129 pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
130
131 /// Query the general category property of a character.
132 ///
133 /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
134 pub trait UnicodeGeneralCategory: Sized {
135 /// Queries the most general classification of a character.
136 fn general_category(self) -> GeneralCategory;
137
138 /// Queries the grouping of the most general classification of a character.
139 fn general_category_group(self) -> GeneralCategoryGroup {
140 crate::tables::general_category::general_category_group(self.general_category())
141 }
142
143 /// Queries whether the most general classification of a character belongs to the `LetterCased` group
144 ///
145 /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
146 /// categories, and is a subset of the `Letter` group.
147 fn is_letter_cased(self) -> bool {
148 crate::tables::general_category::general_category_is_letter_cased(
149 self.general_category(),
150 )
151 }
152 }
153
154 impl UnicodeGeneralCategory for char {
155 fn general_category(self) -> GeneralCategory {
156 crate::tables::general_category::general_category_of_char(self)
157 }
158 }
159}
160
161pub use tables::UNICODE_VERSION;
162
163#[cfg(feature = "emoji")]
164#[doc(inline)]
165pub use emoji::UnicodeEmoji;
166
167#[cfg(feature = "emoji")]
168#[doc(inline)]
169pub use emoji::EmojiStatus;
170
171#[cfg(feature = "general-category")]
172#[doc(inline)]
173pub use general_category::GeneralCategory;
174
175#[cfg(feature = "general-category")]
176#[doc(inline)]
177pub use general_category::GeneralCategoryGroup;
178
179#[cfg(feature = "general-category")]
180#[doc(inline)]
181pub use general_category::UnicodeGeneralCategory;
182