1use std::path::Path;
2use std::str::FromStr;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
8use crate::error::Error;
9
10/// A single row in the `NameAliases.txt` file.
11///
12/// Note that there are multiple rows for some codepoint. Each row provides a
13/// new alias.
14#[derive(Clone, Debug, Default, Eq, PartialEq)]
15pub struct NameAlias {
16 /// The codepoint corresponding to this row.
17 pub codepoint: Codepoint,
18 /// The alias.
19 pub alias: String,
20 /// The label of this alias.
21 pub label: NameAliasLabel,
22}
23
24impl UcdFile for NameAlias {
25 fn relative_file_path() -> &'static Path {
26 Path::new("NameAliases.txt")
27 }
28}
29
30impl UcdFileByCodepoint for NameAlias {
31 fn codepoints(&self) -> CodepointIter {
32 self.codepoint.into_iter()
33 }
34}
35
36impl FromStr for NameAlias {
37 type Err = Error;
38
39 fn from_str(line: &str) -> Result<NameAlias, Error> {
40 static PARTS: Lazy<Regex> = Lazy::new(|| {
41 Regex::new(
42 r"(?x)
43 ^
44 (?P<codepoint>[A-Z0-9]+);
45 \s*
46 (?P<alias>[^;]+);
47 \s*
48 (?P<label>\S+)
49 ",
50 )
51 .unwrap()
52 });
53
54 let caps = match PARTS.captures(line.trim()) {
55 Some(caps) => caps,
56 None => return err!("invalid NameAliases line"),
57 };
58 Ok(NameAlias {
59 codepoint: caps["codepoint"].parse()?,
60 alias: caps.name("alias").unwrap().as_str().to_string(),
61 label: caps["label"].parse()?,
62 })
63 }
64}
65
66/// The label of a name alias.
67#[derive(Clone, Copy, Debug, Eq, PartialEq)]
68pub enum NameAliasLabel {
69 /// Corrections for serious problems in a character name.
70 Correction,
71 /// ISO 6429 names for C0 and C1 control functions and other commonly
72 /// occurring names for control codes.
73 Control,
74 /// A few widely used alternate names for format characters.
75 Alternate,
76 /// Several documented labels for C1 control code points which were
77 /// never actually approved in any standard.
78 Figment,
79 /// Commonly occurring abbreviations (or acronyms) for control codes,
80 /// format characters, spaces and variation selectors.
81 Abbreviation,
82}
83
84impl Default for NameAliasLabel {
85 fn default() -> NameAliasLabel {
86 // This is arbitrary, but the Default impl is convenient.
87 NameAliasLabel::Correction
88 }
89}
90
91impl FromStr for NameAliasLabel {
92 type Err = Error;
93
94 fn from_str(s: &str) -> Result<NameAliasLabel, Error> {
95 match s {
96 "correction" => Ok(NameAliasLabel::Correction),
97 "control" => Ok(NameAliasLabel::Control),
98 "alternate" => Ok(NameAliasLabel::Alternate),
99 "figment" => Ok(NameAliasLabel::Figment),
100 "abbreviation" => Ok(NameAliasLabel::Abbreviation),
101 unknown: &str => err!("unknown name alias label: '{}'", unknown),
102 }
103 }
104}
105
106#[cfg(test)]
107mod tests {
108 use super::{NameAlias, NameAliasLabel};
109
110 #[test]
111 fn parse1() {
112 let line = "0000;NULL;control\n";
113 let row: NameAlias = line.parse().unwrap();
114 assert_eq!(row.codepoint, 0x0);
115 assert_eq!(row.alias, "NULL");
116 assert_eq!(row.label, NameAliasLabel::Control);
117 }
118
119 #[test]
120 fn parse2() {
121 let line = "000B;VERTICAL TABULATION;control\n";
122 let row: NameAlias = line.parse().unwrap();
123 assert_eq!(row.codepoint, 0xB);
124 assert_eq!(row.alias, "VERTICAL TABULATION");
125 assert_eq!(row.label, NameAliasLabel::Control);
126 }
127
128 #[test]
129 fn parse3() {
130 let line = "0081;HIGH OCTET PRESET;figment\n";
131 let row: NameAlias = line.parse().unwrap();
132 assert_eq!(row.codepoint, 0x81);
133 assert_eq!(row.alias, "HIGH OCTET PRESET");
134 assert_eq!(row.label, NameAliasLabel::Figment);
135 }
136
137 #[test]
138 fn parse4() {
139 let line = "E01EF;VS256;abbreviation\n";
140 let row: NameAlias = line.parse().unwrap();
141 assert_eq!(row.codepoint, 0xE01EF);
142 assert_eq!(row.alias, "VS256");
143 assert_eq!(row.label, NameAliasLabel::Abbreviation);
144 }
145}
146