1use std::path::Path;
2
3use crate::{
4 common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint},
5 error::Error,
6};
7
8/// A single row in the `NameAliases.txt` file.
9///
10/// Note that there are multiple rows for some codepoint. Each row provides a
11/// new alias.
12#[derive(Clone, Debug, Default, Eq, PartialEq)]
13pub struct NameAlias {
14 /// The codepoint corresponding to this row.
15 pub codepoint: Codepoint,
16 /// The alias.
17 pub alias: String,
18 /// The label of this alias.
19 pub label: NameAliasLabel,
20}
21
22impl UcdFile for NameAlias {
23 fn relative_file_path() -> &'static Path {
24 Path::new("NameAliases.txt")
25 }
26}
27
28impl UcdFileByCodepoint for NameAlias {
29 fn codepoints(&self) -> CodepointIter {
30 self.codepoint.into_iter()
31 }
32}
33
34impl std::str::FromStr for NameAlias {
35 type Err = Error;
36
37 fn from_str(line: &str) -> Result<NameAlias, Error> {
38 let re_parts = regex!(
39 r"(?x)
40 ^
41 (?P<codepoint>[A-Z0-9]+);
42 \s*
43 (?P<alias>[^;]+);
44 \s*
45 (?P<label>\S+)
46 ",
47 );
48
49 let caps = match re_parts.captures(line.trim()) {
50 Some(caps) => caps,
51 None => return err!("invalid NameAliases line"),
52 };
53 Ok(NameAlias {
54 codepoint: caps["codepoint"].parse()?,
55 alias: caps.name("alias").unwrap().as_str().to_string(),
56 label: caps["label"].parse()?,
57 })
58 }
59}
60
61/// The label of a name alias.
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum NameAliasLabel {
64 /// Corrections for serious problems in a character name.
65 Correction,
66 /// ISO 6429 names for C0 and C1 control functions and other commonly
67 /// occurring names for control codes.
68 Control,
69 /// A few widely used alternate names for format characters.
70 Alternate,
71 /// Several documented labels for C1 control code points which were
72 /// never actually approved in any standard.
73 Figment,
74 /// Commonly occurring abbreviations (or acronyms) for control codes,
75 /// format characters, spaces and variation selectors.
76 Abbreviation,
77}
78
79impl Default for NameAliasLabel {
80 fn default() -> NameAliasLabel {
81 // This is arbitrary, but the Default impl is convenient.
82 NameAliasLabel::Correction
83 }
84}
85
86impl std::str::FromStr for NameAliasLabel {
87 type Err = Error;
88
89 fn from_str(s: &str) -> Result<NameAliasLabel, Error> {
90 match s {
91 "correction" => Ok(NameAliasLabel::Correction),
92 "control" => Ok(NameAliasLabel::Control),
93 "alternate" => Ok(NameAliasLabel::Alternate),
94 "figment" => Ok(NameAliasLabel::Figment),
95 "abbreviation" => Ok(NameAliasLabel::Abbreviation),
96 unknown: &str => err!("unknown name alias label: '{}'", unknown),
97 }
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::{NameAlias, NameAliasLabel};
104
105 #[test]
106 fn parse1() {
107 let line = "0000;NULL;control\n";
108 let row: NameAlias = line.parse().unwrap();
109 assert_eq!(row.codepoint, 0x0);
110 assert_eq!(row.alias, "NULL");
111 assert_eq!(row.label, NameAliasLabel::Control);
112 }
113
114 #[test]
115 fn parse2() {
116 let line = "000B;VERTICAL TABULATION;control\n";
117 let row: NameAlias = line.parse().unwrap();
118 assert_eq!(row.codepoint, 0xB);
119 assert_eq!(row.alias, "VERTICAL TABULATION");
120 assert_eq!(row.label, NameAliasLabel::Control);
121 }
122
123 #[test]
124 fn parse3() {
125 let line = "0081;HIGH OCTET PRESET;figment\n";
126 let row: NameAlias = line.parse().unwrap();
127 assert_eq!(row.codepoint, 0x81);
128 assert_eq!(row.alias, "HIGH OCTET PRESET");
129 assert_eq!(row.label, NameAliasLabel::Figment);
130 }
131
132 #[test]
133 fn parse4() {
134 let line = "E01EF;VS256;abbreviation\n";
135 let row: NameAlias = line.parse().unwrap();
136 assert_eq!(row.codepoint, 0xE01EF);
137 assert_eq!(row.alias, "VS256");
138 assert_eq!(row.label, NameAliasLabel::Abbreviation);
139 }
140}
141