| 1 | // Copyright 2012-2024 The Rust Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution and at |
| 3 | // https://www.rust-lang.org/policies/licenses. |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 8 | // option. This file may not be copied, modified, or distributed |
| 9 | // except according to those terms. |
| 10 | |
| 11 | //! Determine if a `char` is a valid identifier for a parser and/or lexer according to |
| 12 | //! [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules. |
| 13 | //! |
| 14 | //! ```rust |
| 15 | //! use unicode_xid::UnicodeXID; |
| 16 | //! |
| 17 | //! fn main() { |
| 18 | //! assert_eq!(UnicodeXID::is_xid_start('a' ), true); // 'a' is a valid start of an identifier |
| 19 | //! assert_eq!(UnicodeXID::is_xid_start('△' ), false); // '△' is a NOT valid start of an identifier |
| 20 | //! } |
| 21 | //! ``` |
| 22 | //! |
| 23 | //! # features |
| 24 | //! |
| 25 | //! unicode-xid supports a `no_std` feature. This eliminates dependence |
| 26 | //! on std, and instead uses equivalent functions from core. |
| 27 | //! |
| 28 | |
| 29 | #![forbid (unsafe_code)] |
| 30 | #![deny (missing_docs)] |
| 31 | #![doc ( |
| 32 | html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png" , |
| 33 | html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png" |
| 34 | )] |
| 35 | #![no_std ] |
| 36 | #![cfg_attr (feature = "bench" , feature(test, unicode_internals))] |
| 37 | |
| 38 | #[cfg (test)] |
| 39 | #[macro_use ] |
| 40 | extern crate std; |
| 41 | |
| 42 | #[cfg (feature = "bench" )] |
| 43 | extern crate test; |
| 44 | |
| 45 | use tables::derived_property; |
| 46 | pub use tables::UNICODE_VERSION; |
| 47 | |
| 48 | mod tables; |
| 49 | |
| 50 | #[cfg (test)] |
| 51 | mod tests; |
| 52 | |
| 53 | /// Methods for determining if a character is a valid identifier character. |
| 54 | pub trait UnicodeXID { |
| 55 | /// Returns whether the specified character satisfies the 'XID_Start' |
| 56 | /// Unicode property. |
| 57 | /// |
| 58 | /// 'XID_Start' is a Unicode Derived Property specified in |
| 59 | /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), |
| 60 | /// mostly similar to ID_Start but modified for closure under NFKx. |
| 61 | fn is_xid_start(self) -> bool; |
| 62 | |
| 63 | /// Returns whether the specified `char` satisfies the 'XID_Continue' |
| 64 | /// Unicode property. |
| 65 | /// |
| 66 | /// 'XID_Continue' is a Unicode Derived Property specified in |
| 67 | /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), |
| 68 | /// mostly similar to 'ID_Continue' but modified for closure under NFKx. |
| 69 | fn is_xid_continue(self) -> bool; |
| 70 | } |
| 71 | |
| 72 | impl UnicodeXID for char { |
| 73 | #[inline ] |
| 74 | fn is_xid_start(self) -> bool { |
| 75 | // Fast-path for ascii idents |
| 76 | ('a' <= self && self <= 'z' ) |
| 77 | || ('A' <= self && self <= 'Z' ) |
| 78 | || (self > ' \x7f' && derived_property::XID_Start(self)) |
| 79 | } |
| 80 | |
| 81 | #[inline ] |
| 82 | fn is_xid_continue(self) -> bool { |
| 83 | // Fast-path for ascii idents |
| 84 | ('a' <= self && self <= 'z' ) |
| 85 | || ('A' <= self && self <= 'Z' ) |
| 86 | || ('0' <= self && self <= '9' ) |
| 87 | || self == '_' |
| 88 | || (self > ' \x7f' && derived_property::XID_Continue(self)) |
| 89 | } |
| 90 | } |
| 91 | |