1 | use std::path::Path; |
2 | |
3 | use crate::{common::UcdFile, error::Error}; |
4 | |
5 | /// A single row in the `PropertyValueAliases.txt` file. |
6 | #[derive (Clone, Debug, Default, Eq, PartialEq)] |
7 | pub struct PropertyValueAlias { |
8 | /// The property name for which this value alias applies. |
9 | pub property: String, |
10 | /// A numeric abbreviation for this property value, if present. (This is |
11 | /// seemingly only present for the `ccc`/`Canonical_Combining_Class` |
12 | /// property.) |
13 | pub numeric: Option<u8>, |
14 | /// An abbreviation for this property value. |
15 | pub abbreviation: String, |
16 | /// The "long" form of this property value. |
17 | pub long: String, |
18 | /// Additional value aliases (if present). |
19 | pub aliases: Vec<String>, |
20 | } |
21 | |
22 | impl UcdFile for PropertyValueAlias { |
23 | fn relative_file_path() -> &'static Path { |
24 | Path::new("PropertyValueAliases.txt" ) |
25 | } |
26 | } |
27 | |
28 | impl std::str::FromStr for PropertyValueAlias { |
29 | type Err = Error; |
30 | |
31 | fn from_str(line: &str) -> Result<PropertyValueAlias, Error> { |
32 | let re_parts = regex!( |
33 | r"(?x) |
34 | ^ |
35 | \s*(?P<prop>[^\s;]+)\s*; |
36 | \s*(?P<abbrev>[^\s;]+)\s*; |
37 | \s*(?P<long>[^\s;]+)\s* |
38 | (?:;(?P<aliases>.*))? |
39 | " , |
40 | ); |
41 | let re_parts_ccc = regex!( |
42 | r"(?x) |
43 | ^ |
44 | ccc; |
45 | \s*(?P<num_class>[0-9]+)\s*; |
46 | \s*(?P<abbrev>[^\s;]+)\s*; |
47 | \s*(?P<long>[^\s;]+) |
48 | " , |
49 | ); |
50 | let re_aliases = regex!(r"\s*(?P<alias>[^\s;]+)\s*;?\s*" ); |
51 | |
52 | if line.starts_with("ccc;" ) { |
53 | let caps = match re_parts_ccc.captures(line.trim()) { |
54 | Some(caps) => caps, |
55 | None => { |
56 | return err!("invalid PropertyValueAliases (ccc) line" ) |
57 | } |
58 | }; |
59 | let n = match caps["num_class" ].parse() { |
60 | Ok(n) => n, |
61 | Err(err) => { |
62 | return err!( |
63 | "failed to parse ccc number ' {}': {}" , |
64 | &caps["num_class" ], |
65 | err |
66 | ) |
67 | } |
68 | }; |
69 | let abbrev = caps.name("abbrev" ).unwrap().as_str(); |
70 | let long = caps.name("long" ).unwrap().as_str(); |
71 | return Ok(PropertyValueAlias { |
72 | property: line[0..3].to_string(), |
73 | numeric: Some(n), |
74 | abbreviation: abbrev.to_string(), |
75 | long: long.to_string(), |
76 | aliases: vec![], |
77 | }); |
78 | } |
79 | |
80 | let caps = match re_parts.captures(line.trim()) { |
81 | Some(caps) => caps, |
82 | None => return err!("invalid PropertyValueAliases line" ), |
83 | }; |
84 | let mut aliases = vec![]; |
85 | if let Some(m) = caps.name("aliases" ) { |
86 | for acaps in re_aliases.captures_iter(m.as_str()) { |
87 | let alias = acaps.name("alias" ).unwrap().as_str(); |
88 | if alias == "#" { |
89 | // This starts a comment, so stop reading. |
90 | break; |
91 | } |
92 | aliases.push(alias.to_string()); |
93 | } |
94 | } |
95 | Ok(PropertyValueAlias { |
96 | property: caps.name("prop" ).unwrap().as_str().to_string(), |
97 | numeric: None, |
98 | abbreviation: caps.name("abbrev" ).unwrap().as_str().to_string(), |
99 | long: caps.name("long" ).unwrap().as_str().to_string(), |
100 | aliases, |
101 | }) |
102 | } |
103 | } |
104 | |
105 | #[cfg (test)] |
106 | mod tests { |
107 | use super::PropertyValueAlias; |
108 | |
109 | #[test ] |
110 | fn parse1() { |
111 | let line = "blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A \n" ; |
112 | let row: PropertyValueAlias = line.parse().unwrap(); |
113 | assert_eq!(row.property, "blk" ); |
114 | assert_eq!(row.numeric, None); |
115 | assert_eq!(row.abbreviation, "Arabic_PF_A" ); |
116 | assert_eq!(row.long, "Arabic_Presentation_Forms_A" ); |
117 | assert_eq!(row.aliases, vec!["Arabic_Presentation_Forms-A" ]); |
118 | } |
119 | |
120 | #[test ] |
121 | fn parse2() { |
122 | let line = "AHex; N ; No ; F ; False \n" ; |
123 | let row: PropertyValueAlias = line.parse().unwrap(); |
124 | assert_eq!(row.property, "AHex" ); |
125 | assert_eq!(row.numeric, None); |
126 | assert_eq!(row.abbreviation, "N" ); |
127 | assert_eq!(row.long, "No" ); |
128 | assert_eq!(row.aliases, vec!["F" , "False" ]); |
129 | } |
130 | |
131 | #[test ] |
132 | fn parse3() { |
133 | let line = "age; 1.1 ; V1_1 \n" ; |
134 | let row: PropertyValueAlias = line.parse().unwrap(); |
135 | assert_eq!(row.property, "age" ); |
136 | assert_eq!(row.numeric, None); |
137 | assert_eq!(row.abbreviation, "1.1" ); |
138 | assert_eq!(row.long, "V1_1" ); |
139 | assert!(row.aliases.is_empty()); |
140 | } |
141 | |
142 | #[test ] |
143 | fn parse4() { |
144 | let line = "ccc; 0; NR ; Not_Reordered \n" ; |
145 | let row: PropertyValueAlias = line.parse().unwrap(); |
146 | assert_eq!(row.property, "ccc" ); |
147 | assert_eq!(row.numeric, Some(0)); |
148 | assert_eq!(row.abbreviation, "NR" ); |
149 | assert_eq!(row.long, "Not_Reordered" ); |
150 | assert!(row.aliases.is_empty()); |
151 | } |
152 | |
153 | #[test ] |
154 | fn parse5() { |
155 | let line = |
156 | "ccc; 133; CCC133 ; CCC133 # RESERVED \n" ; |
157 | let row: PropertyValueAlias = line.parse().unwrap(); |
158 | assert_eq!(row.property, "ccc" ); |
159 | assert_eq!(row.numeric, Some(133)); |
160 | assert_eq!(row.abbreviation, "CCC133" ); |
161 | assert_eq!(row.long, "CCC133" ); |
162 | assert!(row.aliases.is_empty()); |
163 | } |
164 | |
165 | #[test ] |
166 | fn parse6() { |
167 | let line = "gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps \n" ; |
168 | let row: PropertyValueAlias = line.parse().unwrap(); |
169 | assert_eq!(row.property, "gc" ); |
170 | assert_eq!(row.numeric, None); |
171 | assert_eq!(row.abbreviation, "P" ); |
172 | assert_eq!(row.long, "Punctuation" ); |
173 | assert_eq!(row.aliases, vec!["punct" ]); |
174 | } |
175 | } |
176 | |