1 | use std::path::Path; |
2 | |
3 | use crate::{ |
4 | common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}, |
5 | error::Error, |
6 | }; |
7 | |
8 | /// A single row in the `NameAliases.txt` file. |
9 | /// |
10 | /// Note that there are multiple rows for some codepoint. Each row provides a |
11 | /// new alias. |
12 | #[derive (Clone, Debug, Default, Eq, PartialEq)] |
13 | pub struct NameAlias { |
14 | /// The codepoint corresponding to this row. |
15 | pub codepoint: Codepoint, |
16 | /// The alias. |
17 | pub alias: String, |
18 | /// The label of this alias. |
19 | pub label: NameAliasLabel, |
20 | } |
21 | |
22 | impl UcdFile for NameAlias { |
23 | fn relative_file_path() -> &'static Path { |
24 | Path::new("NameAliases.txt" ) |
25 | } |
26 | } |
27 | |
28 | impl UcdFileByCodepoint for NameAlias { |
29 | fn codepoints(&self) -> CodepointIter { |
30 | self.codepoint.into_iter() |
31 | } |
32 | } |
33 | |
34 | impl std::str::FromStr for NameAlias { |
35 | type Err = Error; |
36 | |
37 | fn from_str(line: &str) -> Result<NameAlias, Error> { |
38 | let re_parts = regex!( |
39 | r"(?x) |
40 | ^ |
41 | (?P<codepoint>[A-Z0-9]+); |
42 | \s* |
43 | (?P<alias>[^;]+); |
44 | \s* |
45 | (?P<label>\S+) |
46 | " , |
47 | ); |
48 | |
49 | let caps = match re_parts.captures(line.trim()) { |
50 | Some(caps) => caps, |
51 | None => return err!("invalid NameAliases line" ), |
52 | }; |
53 | Ok(NameAlias { |
54 | codepoint: caps["codepoint" ].parse()?, |
55 | alias: caps.name("alias" ).unwrap().as_str().to_string(), |
56 | label: caps["label" ].parse()?, |
57 | }) |
58 | } |
59 | } |
60 | |
61 | /// The label of a name alias. |
62 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
63 | pub enum NameAliasLabel { |
64 | /// Corrections for serious problems in a character name. |
65 | Correction, |
66 | /// ISO 6429 names for C0 and C1 control functions and other commonly |
67 | /// occurring names for control codes. |
68 | Control, |
69 | /// A few widely used alternate names for format characters. |
70 | Alternate, |
71 | /// Several documented labels for C1 control code points which were |
72 | /// never actually approved in any standard. |
73 | Figment, |
74 | /// Commonly occurring abbreviations (or acronyms) for control codes, |
75 | /// format characters, spaces and variation selectors. |
76 | Abbreviation, |
77 | } |
78 | |
79 | impl Default for NameAliasLabel { |
80 | fn default() -> NameAliasLabel { |
81 | // This is arbitrary, but the Default impl is convenient. |
82 | NameAliasLabel::Correction |
83 | } |
84 | } |
85 | |
86 | impl std::str::FromStr for NameAliasLabel { |
87 | type Err = Error; |
88 | |
89 | fn from_str(s: &str) -> Result<NameAliasLabel, Error> { |
90 | match s { |
91 | "correction" => Ok(NameAliasLabel::Correction), |
92 | "control" => Ok(NameAliasLabel::Control), |
93 | "alternate" => Ok(NameAliasLabel::Alternate), |
94 | "figment" => Ok(NameAliasLabel::Figment), |
95 | "abbreviation" => Ok(NameAliasLabel::Abbreviation), |
96 | unknown: &str => err!("unknown name alias label: ' {}'" , unknown), |
97 | } |
98 | } |
99 | } |
100 | |
101 | #[cfg (test)] |
102 | mod tests { |
103 | use super::{NameAlias, NameAliasLabel}; |
104 | |
105 | #[test ] |
106 | fn parse1() { |
107 | let line = "0000;NULL;control \n" ; |
108 | let row: NameAlias = line.parse().unwrap(); |
109 | assert_eq!(row.codepoint, 0x0); |
110 | assert_eq!(row.alias, "NULL" ); |
111 | assert_eq!(row.label, NameAliasLabel::Control); |
112 | } |
113 | |
114 | #[test ] |
115 | fn parse2() { |
116 | let line = "000B;VERTICAL TABULATION;control \n" ; |
117 | let row: NameAlias = line.parse().unwrap(); |
118 | assert_eq!(row.codepoint, 0xB); |
119 | assert_eq!(row.alias, "VERTICAL TABULATION" ); |
120 | assert_eq!(row.label, NameAliasLabel::Control); |
121 | } |
122 | |
123 | #[test ] |
124 | fn parse3() { |
125 | let line = "0081;HIGH OCTET PRESET;figment \n" ; |
126 | let row: NameAlias = line.parse().unwrap(); |
127 | assert_eq!(row.codepoint, 0x81); |
128 | assert_eq!(row.alias, "HIGH OCTET PRESET" ); |
129 | assert_eq!(row.label, NameAliasLabel::Figment); |
130 | } |
131 | |
132 | #[test ] |
133 | fn parse4() { |
134 | let line = "E01EF;VS256;abbreviation \n" ; |
135 | let row: NameAlias = line.parse().unwrap(); |
136 | assert_eq!(row.codepoint, 0xE01EF); |
137 | assert_eq!(row.alias, "VS256" ); |
138 | assert_eq!(row.label, NameAliasLabel::Abbreviation); |
139 | } |
140 | } |
141 | |