1use std::path::Path;
2use std::str::FromStr;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use crate::common::UcdFile;
8use crate::error::Error;
9
10/// A single row in the `PropertyValueAliases.txt` file.
11#[derive(Clone, Debug, Default, Eq, PartialEq)]
12pub struct PropertyValueAlias {
13 /// The property name for which this value alias applies.
14 pub property: String,
15 /// A numeric abbreviation for this property value, if present. (This is
16 /// seemingly only present for the `ccc`/`Canonical_Combining_Class`
17 /// property.)
18 pub numeric: Option<u8>,
19 /// An abbreviation for this property value.
20 pub abbreviation: String,
21 /// The "long" form of this property value.
22 pub long: String,
23 /// Additional value aliases (if present).
24 pub aliases: Vec<String>,
25}
26
27impl UcdFile for PropertyValueAlias {
28 fn relative_file_path() -> &'static Path {
29 Path::new("PropertyValueAliases.txt")
30 }
31}
32
33impl FromStr for PropertyValueAlias {
34 type Err = Error;
35
36 fn from_str(line: &str) -> Result<PropertyValueAlias, Error> {
37 static PARTS: Lazy<Regex> = Lazy::new(|| {
38 Regex::new(
39 r"(?x)
40 ^
41 \s*(?P<prop>[^\s;]+)\s*;
42 \s*(?P<abbrev>[^\s;]+)\s*;
43 \s*(?P<long>[^\s;]+)\s*
44 (?:;(?P<aliases>.*))?
45 ",
46 )
47 .unwrap()
48 });
49 static PARTS_CCC: Lazy<Regex> = Lazy::new(|| {
50 Regex::new(
51 r"(?x)
52 ^
53 ccc;
54 \s*(?P<num_class>[0-9]+)\s*;
55 \s*(?P<abbrev>[^\s;]+)\s*;
56 \s*(?P<long>[^\s;]+)
57 ",
58 )
59 .unwrap()
60 });
61 static ALIASES: Lazy<Regex> = Lazy::new(|| {
62 Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap()
63 });
64
65 if line.starts_with("ccc;") {
66 let caps = match PARTS_CCC.captures(line.trim()) {
67 Some(caps) => caps,
68 None => {
69 return err!("invalid PropertyValueAliases (ccc) line")
70 }
71 };
72 let n = match caps["num_class"].parse() {
73 Ok(n) => n,
74 Err(err) => {
75 return err!(
76 "failed to parse ccc number '{}': {}",
77 &caps["num_class"],
78 err
79 )
80 }
81 };
82 let abbrev = caps.name("abbrev").unwrap().as_str();
83 let long = caps.name("long").unwrap().as_str();
84 return Ok(PropertyValueAlias {
85 property: line[0..3].to_string(),
86 numeric: Some(n),
87 abbreviation: abbrev.to_string(),
88 long: long.to_string(),
89 aliases: vec![],
90 });
91 }
92
93 let caps = match PARTS.captures(line.trim()) {
94 Some(caps) => caps,
95 None => return err!("invalid PropertyValueAliases line"),
96 };
97 let mut aliases = vec![];
98 if let Some(m) = caps.name("aliases") {
99 for acaps in ALIASES.captures_iter(m.as_str()) {
100 let alias = acaps.name("alias").unwrap().as_str();
101 if alias == "#" {
102 // This starts a comment, so stop reading.
103 break;
104 }
105 aliases.push(alias.to_string());
106 }
107 }
108 Ok(PropertyValueAlias {
109 property: caps.name("prop").unwrap().as_str().to_string(),
110 numeric: None,
111 abbreviation: caps.name("abbrev").unwrap().as_str().to_string(),
112 long: caps.name("long").unwrap().as_str().to_string(),
113 aliases,
114 })
115 }
116}
117
118#[cfg(test)]
119mod tests {
120 use super::PropertyValueAlias;
121
122 #[test]
123 fn parse1() {
124 let line = "blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A\n";
125 let row: PropertyValueAlias = line.parse().unwrap();
126 assert_eq!(row.property, "blk");
127 assert_eq!(row.numeric, None);
128 assert_eq!(row.abbreviation, "Arabic_PF_A");
129 assert_eq!(row.long, "Arabic_Presentation_Forms_A");
130 assert_eq!(row.aliases, vec!["Arabic_Presentation_Forms-A"]);
131 }
132
133 #[test]
134 fn parse2() {
135 let line = "AHex; N ; No ; F ; False\n";
136 let row: PropertyValueAlias = line.parse().unwrap();
137 assert_eq!(row.property, "AHex");
138 assert_eq!(row.numeric, None);
139 assert_eq!(row.abbreviation, "N");
140 assert_eq!(row.long, "No");
141 assert_eq!(row.aliases, vec!["F", "False"]);
142 }
143
144 #[test]
145 fn parse3() {
146 let line = "age; 1.1 ; V1_1\n";
147 let row: PropertyValueAlias = line.parse().unwrap();
148 assert_eq!(row.property, "age");
149 assert_eq!(row.numeric, None);
150 assert_eq!(row.abbreviation, "1.1");
151 assert_eq!(row.long, "V1_1");
152 assert!(row.aliases.is_empty());
153 }
154
155 #[test]
156 fn parse4() {
157 let line = "ccc; 0; NR ; Not_Reordered\n";
158 let row: PropertyValueAlias = line.parse().unwrap();
159 assert_eq!(row.property, "ccc");
160 assert_eq!(row.numeric, Some(0));
161 assert_eq!(row.abbreviation, "NR");
162 assert_eq!(row.long, "Not_Reordered");
163 assert!(row.aliases.is_empty());
164 }
165
166 #[test]
167 fn parse5() {
168 let line =
169 "ccc; 133; CCC133 ; CCC133 # RESERVED\n";
170 let row: PropertyValueAlias = line.parse().unwrap();
171 assert_eq!(row.property, "ccc");
172 assert_eq!(row.numeric, Some(133));
173 assert_eq!(row.abbreviation, "CCC133");
174 assert_eq!(row.long, "CCC133");
175 assert!(row.aliases.is_empty());
176 }
177
178 #[test]
179 fn parse6() {
180 let line = "gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps\n";
181 let row: PropertyValueAlias = line.parse().unwrap();
182 assert_eq!(row.property, "gc");
183 assert_eq!(row.numeric, None);
184 assert_eq!(row.abbreviation, "P");
185 assert_eq!(row.long, "Punctuation");
186 assert_eq!(row.aliases, vec!["punct"]);
187 }
188}
189