| 1 | use std::path::Path; |
| 2 | |
| 3 | use crate::{common::UcdFile, error::Error}; |
| 4 | |
| 5 | /// A single row in the `PropertyValueAliases.txt` file. |
| 6 | #[derive (Clone, Debug, Default, Eq, PartialEq)] |
| 7 | pub struct PropertyValueAlias { |
| 8 | /// The property name for which this value alias applies. |
| 9 | pub property: String, |
| 10 | /// A numeric abbreviation for this property value, if present. (This is |
| 11 | /// seemingly only present for the `ccc`/`Canonical_Combining_Class` |
| 12 | /// property.) |
| 13 | pub numeric: Option<u8>, |
| 14 | /// An abbreviation for this property value. |
| 15 | pub abbreviation: String, |
| 16 | /// The "long" form of this property value. |
| 17 | pub long: String, |
| 18 | /// Additional value aliases (if present). |
| 19 | pub aliases: Vec<String>, |
| 20 | } |
| 21 | |
| 22 | impl UcdFile for PropertyValueAlias { |
| 23 | fn relative_file_path() -> &'static Path { |
| 24 | Path::new("PropertyValueAliases.txt" ) |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | impl std::str::FromStr for PropertyValueAlias { |
| 29 | type Err = Error; |
| 30 | |
| 31 | fn from_str(line: &str) -> Result<PropertyValueAlias, Error> { |
| 32 | let re_parts = regex!( |
| 33 | r"(?x) |
| 34 | ^ |
| 35 | \s*(?P<prop>[^\s;]+)\s*; |
| 36 | \s*(?P<abbrev>[^\s;]+)\s*; |
| 37 | \s*(?P<long>[^\s;]+)\s* |
| 38 | (?:;(?P<aliases>.*))? |
| 39 | " , |
| 40 | ); |
| 41 | let re_parts_ccc = regex!( |
| 42 | r"(?x) |
| 43 | ^ |
| 44 | ccc; |
| 45 | \s*(?P<num_class>[0-9]+)\s*; |
| 46 | \s*(?P<abbrev>[^\s;]+)\s*; |
| 47 | \s*(?P<long>[^\s;]+) |
| 48 | " , |
| 49 | ); |
| 50 | let re_aliases = regex!(r"\s*(?P<alias>[^\s;]+)\s*;?\s*" ); |
| 51 | |
| 52 | if line.starts_with("ccc;" ) { |
| 53 | let caps = match re_parts_ccc.captures(line.trim()) { |
| 54 | Some(caps) => caps, |
| 55 | None => { |
| 56 | return err!("invalid PropertyValueAliases (ccc) line" ) |
| 57 | } |
| 58 | }; |
| 59 | let n = match caps["num_class" ].parse() { |
| 60 | Ok(n) => n, |
| 61 | Err(err) => { |
| 62 | return err!( |
| 63 | "failed to parse ccc number ' {}': {}" , |
| 64 | &caps["num_class" ], |
| 65 | err |
| 66 | ) |
| 67 | } |
| 68 | }; |
| 69 | let abbrev = caps.name("abbrev" ).unwrap().as_str(); |
| 70 | let long = caps.name("long" ).unwrap().as_str(); |
| 71 | return Ok(PropertyValueAlias { |
| 72 | property: line[0..3].to_string(), |
| 73 | numeric: Some(n), |
| 74 | abbreviation: abbrev.to_string(), |
| 75 | long: long.to_string(), |
| 76 | aliases: vec![], |
| 77 | }); |
| 78 | } |
| 79 | |
| 80 | let caps = match re_parts.captures(line.trim()) { |
| 81 | Some(caps) => caps, |
| 82 | None => return err!("invalid PropertyValueAliases line" ), |
| 83 | }; |
| 84 | let mut aliases = vec![]; |
| 85 | if let Some(m) = caps.name("aliases" ) { |
| 86 | for acaps in re_aliases.captures_iter(m.as_str()) { |
| 87 | let alias = acaps.name("alias" ).unwrap().as_str(); |
| 88 | if alias == "#" { |
| 89 | // This starts a comment, so stop reading. |
| 90 | break; |
| 91 | } |
| 92 | aliases.push(alias.to_string()); |
| 93 | } |
| 94 | } |
| 95 | Ok(PropertyValueAlias { |
| 96 | property: caps.name("prop" ).unwrap().as_str().to_string(), |
| 97 | numeric: None, |
| 98 | abbreviation: caps.name("abbrev" ).unwrap().as_str().to_string(), |
| 99 | long: caps.name("long" ).unwrap().as_str().to_string(), |
| 100 | aliases, |
| 101 | }) |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | #[cfg (test)] |
| 106 | mod tests { |
| 107 | use super::PropertyValueAlias; |
| 108 | |
| 109 | #[test ] |
| 110 | fn parse1() { |
| 111 | let line = "blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A \n" ; |
| 112 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 113 | assert_eq!(row.property, "blk" ); |
| 114 | assert_eq!(row.numeric, None); |
| 115 | assert_eq!(row.abbreviation, "Arabic_PF_A" ); |
| 116 | assert_eq!(row.long, "Arabic_Presentation_Forms_A" ); |
| 117 | assert_eq!(row.aliases, vec!["Arabic_Presentation_Forms-A" ]); |
| 118 | } |
| 119 | |
| 120 | #[test ] |
| 121 | fn parse2() { |
| 122 | let line = "AHex; N ; No ; F ; False \n" ; |
| 123 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 124 | assert_eq!(row.property, "AHex" ); |
| 125 | assert_eq!(row.numeric, None); |
| 126 | assert_eq!(row.abbreviation, "N" ); |
| 127 | assert_eq!(row.long, "No" ); |
| 128 | assert_eq!(row.aliases, vec!["F" , "False" ]); |
| 129 | } |
| 130 | |
| 131 | #[test ] |
| 132 | fn parse3() { |
| 133 | let line = "age; 1.1 ; V1_1 \n" ; |
| 134 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 135 | assert_eq!(row.property, "age" ); |
| 136 | assert_eq!(row.numeric, None); |
| 137 | assert_eq!(row.abbreviation, "1.1" ); |
| 138 | assert_eq!(row.long, "V1_1" ); |
| 139 | assert!(row.aliases.is_empty()); |
| 140 | } |
| 141 | |
| 142 | #[test ] |
| 143 | fn parse4() { |
| 144 | let line = "ccc; 0; NR ; Not_Reordered \n" ; |
| 145 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 146 | assert_eq!(row.property, "ccc" ); |
| 147 | assert_eq!(row.numeric, Some(0)); |
| 148 | assert_eq!(row.abbreviation, "NR" ); |
| 149 | assert_eq!(row.long, "Not_Reordered" ); |
| 150 | assert!(row.aliases.is_empty()); |
| 151 | } |
| 152 | |
| 153 | #[test ] |
| 154 | fn parse5() { |
| 155 | let line = |
| 156 | "ccc; 133; CCC133 ; CCC133 # RESERVED \n" ; |
| 157 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 158 | assert_eq!(row.property, "ccc" ); |
| 159 | assert_eq!(row.numeric, Some(133)); |
| 160 | assert_eq!(row.abbreviation, "CCC133" ); |
| 161 | assert_eq!(row.long, "CCC133" ); |
| 162 | assert!(row.aliases.is_empty()); |
| 163 | } |
| 164 | |
| 165 | #[test ] |
| 166 | fn parse6() { |
| 167 | let line = "gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps \n" ; |
| 168 | let row: PropertyValueAlias = line.parse().unwrap(); |
| 169 | assert_eq!(row.property, "gc" ); |
| 170 | assert_eq!(row.numeric, None); |
| 171 | assert_eq!(row.abbreviation, "P" ); |
| 172 | assert_eq!(row.long, "Punctuation" ); |
| 173 | assert_eq!(row.aliases, vec!["punct" ]); |
| 174 | } |
| 175 | } |
| 176 | |