1use std::path::Path;
2use std::str::FromStr;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use crate::common::UcdFile;
8use crate::error::Error;
9
10/// A single row in the `PropertyAliases.txt` file.
11#[derive(Clone, Debug, Default, Eq, PartialEq)]
12pub struct PropertyAlias {
13 /// An abbreviation for this property.
14 pub abbreviation: String,
15 /// The "long" name of this property.
16 pub long: String,
17 /// Additional aliases (if present).
18 pub aliases: Vec<String>,
19}
20
21impl UcdFile for PropertyAlias {
22 fn relative_file_path() -> &'static Path {
23 Path::new("PropertyAliases.txt")
24 }
25}
26
27impl FromStr for PropertyAlias {
28 type Err = Error;
29
30 fn from_str(line: &str) -> Result<PropertyAlias, Error> {
31 static PARTS: Lazy<Regex> = Lazy::new(|| {
32 Regex::new(
33 r"(?x)
34 ^
35 \s*(?P<abbrev>[^\s;]+)\s*;
36 \s*(?P<long>[^\s;]+)\s*
37 (?:;(?P<aliases>.*))?
38 ",
39 )
40 .unwrap()
41 });
42 static ALIASES: Lazy<Regex> = Lazy::new(|| {
43 Regex::new(r"\s*(?P<alias>[^\s;]+)\s*;?\s*").unwrap()
44 });
45
46 let caps = match PARTS.captures(line.trim()) {
47 Some(caps) => caps,
48 None => return err!("invalid PropertyAliases line: '{}'", line),
49 };
50 let mut aliases = vec![];
51 if let Some(m) = caps.name("aliases") {
52 for acaps in ALIASES.captures_iter(m.as_str()) {
53 let alias = acaps.name("alias").unwrap().as_str();
54 aliases.push(alias.to_string());
55 }
56 }
57 Ok(PropertyAlias {
58 abbreviation: caps.name("abbrev").unwrap().as_str().to_string(),
59 long: caps.name("long").unwrap().as_str().to_string(),
60 aliases,
61 })
62 }
63}
64
65#[cfg(test)]
66mod tests {
67 use super::PropertyAlias;
68
69 #[test]
70 fn parse1() {
71 let line = "cjkAccountingNumeric ; kAccountingNumeric\n";
72 let row: PropertyAlias = line.parse().unwrap();
73 assert_eq!(row.abbreviation, "cjkAccountingNumeric");
74 assert_eq!(row.long, "kAccountingNumeric");
75 assert!(row.aliases.is_empty());
76 }
77
78 #[test]
79 fn parse2() {
80 let line = "nv ; Numeric_Value\n";
81 let row: PropertyAlias = line.parse().unwrap();
82 assert_eq!(row.abbreviation, "nv");
83 assert_eq!(row.long, "Numeric_Value");
84 assert!(row.aliases.is_empty());
85 }
86
87 #[test]
88 fn parse3() {
89 let line =
90 "scf ; Simple_Case_Folding ; sfc\n";
91 let row: PropertyAlias = line.parse().unwrap();
92 assert_eq!(row.abbreviation, "scf");
93 assert_eq!(row.long, "Simple_Case_Folding");
94 assert_eq!(row.aliases, vec!["sfc"]);
95 }
96
97 #[test]
98 fn parse4() {
99 let line = "cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS\n";
100 let row: PropertyAlias = line.parse().unwrap();
101 assert_eq!(row.abbreviation, "cjkRSUnicode");
102 assert_eq!(row.long, "kRSUnicode");
103 assert_eq!(row.aliases, vec!["Unicode_Radical_Stroke", "URS"]);
104 }
105
106 #[test]
107 fn parse5() {
108 let line = "isc ; ISO_Comment";
109 let row: PropertyAlias = line.parse().unwrap();
110 assert_eq!(row.abbreviation, "isc");
111 assert_eq!(row.long, "ISO_Comment");
112 assert!(row.aliases.is_empty());
113 }
114}
115