1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Determine if a `char` is a valid identifier for a parser and/or lexer according to |
12 | //! [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules. |
13 | //! |
14 | //! ```rust |
15 | //! extern crate unicode_xid; |
16 | //! |
17 | //! use unicode_xid::UnicodeXID; |
18 | //! |
19 | //! fn main() { |
20 | //! let ch = 'a' ; |
21 | //! println!("Is {} a valid start of an identifier? {}" , ch, UnicodeXID::is_xid_start(ch)); |
22 | //! } |
23 | //! ``` |
24 | //! |
25 | //! # features |
26 | //! |
27 | //! unicode-xid supports a `no_std` feature. This eliminates dependence |
28 | //! on std, and instead uses equivalent functions from core. |
29 | //! |
30 | |
31 | #![forbid (unsafe_code)] |
32 | #![deny (missing_docs)] |
33 | #![doc ( |
34 | html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png" , |
35 | html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png" |
36 | )] |
37 | #![no_std ] |
38 | #![cfg_attr (feature = "bench" , feature(test, unicode_internals))] |
39 | |
40 | #[cfg (test)] |
41 | #[macro_use ] |
42 | extern crate std; |
43 | |
44 | #[cfg (feature = "bench" )] |
45 | extern crate test; |
46 | |
47 | use tables::derived_property; |
48 | pub use tables::UNICODE_VERSION; |
49 | |
50 | mod tables; |
51 | |
52 | #[cfg (test)] |
53 | mod tests; |
54 | |
55 | /// Methods for determining if a character is a valid identifier character. |
56 | pub trait UnicodeXID { |
57 | /// Returns whether the specified character satisfies the 'XID_Start' |
58 | /// Unicode property. |
59 | /// |
60 | /// 'XID_Start' is a Unicode Derived Property specified in |
61 | /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), |
62 | /// mostly similar to ID_Start but modified for closure under NFKx. |
63 | fn is_xid_start(self) -> bool; |
64 | |
65 | /// Returns whether the specified `char` satisfies the 'XID_Continue' |
66 | /// Unicode property. |
67 | /// |
68 | /// 'XID_Continue' is a Unicode Derived Property specified in |
69 | /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), |
70 | /// mostly similar to 'ID_Continue' but modified for closure under NFKx. |
71 | fn is_xid_continue(self) -> bool; |
72 | } |
73 | |
74 | impl UnicodeXID for char { |
75 | #[inline ] |
76 | fn is_xid_start(self) -> bool { |
77 | // Fast-path for ascii idents |
78 | ('a' <= self && self <= 'z' ) |
79 | || ('A' <= self && self <= 'Z' ) |
80 | || (self > ' \x7f' && derived_property::XID_Start(self)) |
81 | } |
82 | |
83 | #[inline ] |
84 | fn is_xid_continue(self) -> bool { |
85 | // Fast-path for ascii idents |
86 | ('a' <= self && self <= 'z' ) |
87 | || ('A' <= self && self <= 'Z' ) |
88 | || ('0' <= self && self <= '9' ) |
89 | || self == '_' |
90 | || (self > ' \x7f' && derived_property::XID_Continue(self)) |
91 | } |
92 | } |
93 | |