| 1 | use std::path::Path; |
| 2 | |
| 3 | use crate::{ |
| 4 | common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}, |
| 5 | error::Error, |
| 6 | }; |
| 7 | |
| 8 | /// A single row in the `NameAliases.txt` file. |
| 9 | /// |
| 10 | /// Note that there are multiple rows for some codepoint. Each row provides a |
| 11 | /// new alias. |
| 12 | #[derive (Clone, Debug, Default, Eq, PartialEq)] |
| 13 | pub struct NameAlias { |
| 14 | /// The codepoint corresponding to this row. |
| 15 | pub codepoint: Codepoint, |
| 16 | /// The alias. |
| 17 | pub alias: String, |
| 18 | /// The label of this alias. |
| 19 | pub label: NameAliasLabel, |
| 20 | } |
| 21 | |
| 22 | impl UcdFile for NameAlias { |
| 23 | fn relative_file_path() -> &'static Path { |
| 24 | Path::new("NameAliases.txt" ) |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | impl UcdFileByCodepoint for NameAlias { |
| 29 | fn codepoints(&self) -> CodepointIter { |
| 30 | self.codepoint.into_iter() |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | impl std::str::FromStr for NameAlias { |
| 35 | type Err = Error; |
| 36 | |
| 37 | fn from_str(line: &str) -> Result<NameAlias, Error> { |
| 38 | let re_parts = regex!( |
| 39 | r"(?x) |
| 40 | ^ |
| 41 | (?P<codepoint>[A-Z0-9]+); |
| 42 | \s* |
| 43 | (?P<alias>[^;]+); |
| 44 | \s* |
| 45 | (?P<label>\S+) |
| 46 | " , |
| 47 | ); |
| 48 | |
| 49 | let caps = match re_parts.captures(line.trim()) { |
| 50 | Some(caps) => caps, |
| 51 | None => return err!("invalid NameAliases line" ), |
| 52 | }; |
| 53 | Ok(NameAlias { |
| 54 | codepoint: caps["codepoint" ].parse()?, |
| 55 | alias: caps.name("alias" ).unwrap().as_str().to_string(), |
| 56 | label: caps["label" ].parse()?, |
| 57 | }) |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | /// The label of a name alias. |
| 62 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 63 | pub enum NameAliasLabel { |
| 64 | /// Corrections for serious problems in a character name. |
| 65 | Correction, |
| 66 | /// ISO 6429 names for C0 and C1 control functions and other commonly |
| 67 | /// occurring names for control codes. |
| 68 | Control, |
| 69 | /// A few widely used alternate names for format characters. |
| 70 | Alternate, |
| 71 | /// Several documented labels for C1 control code points which were |
| 72 | /// never actually approved in any standard. |
| 73 | Figment, |
| 74 | /// Commonly occurring abbreviations (or acronyms) for control codes, |
| 75 | /// format characters, spaces and variation selectors. |
| 76 | Abbreviation, |
| 77 | } |
| 78 | |
| 79 | impl Default for NameAliasLabel { |
| 80 | fn default() -> NameAliasLabel { |
| 81 | // This is arbitrary, but the Default impl is convenient. |
| 82 | NameAliasLabel::Correction |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | impl std::str::FromStr for NameAliasLabel { |
| 87 | type Err = Error; |
| 88 | |
| 89 | fn from_str(s: &str) -> Result<NameAliasLabel, Error> { |
| 90 | match s { |
| 91 | "correction" => Ok(NameAliasLabel::Correction), |
| 92 | "control" => Ok(NameAliasLabel::Control), |
| 93 | "alternate" => Ok(NameAliasLabel::Alternate), |
| 94 | "figment" => Ok(NameAliasLabel::Figment), |
| 95 | "abbreviation" => Ok(NameAliasLabel::Abbreviation), |
| 96 | unknown: &str => err!("unknown name alias label: ' {}'" , unknown), |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | #[cfg (test)] |
| 102 | mod tests { |
| 103 | use super::{NameAlias, NameAliasLabel}; |
| 104 | |
| 105 | #[test ] |
| 106 | fn parse1() { |
| 107 | let line = "0000;NULL;control \n" ; |
| 108 | let row: NameAlias = line.parse().unwrap(); |
| 109 | assert_eq!(row.codepoint, 0x0); |
| 110 | assert_eq!(row.alias, "NULL" ); |
| 111 | assert_eq!(row.label, NameAliasLabel::Control); |
| 112 | } |
| 113 | |
| 114 | #[test ] |
| 115 | fn parse2() { |
| 116 | let line = "000B;VERTICAL TABULATION;control \n" ; |
| 117 | let row: NameAlias = line.parse().unwrap(); |
| 118 | assert_eq!(row.codepoint, 0xB); |
| 119 | assert_eq!(row.alias, "VERTICAL TABULATION" ); |
| 120 | assert_eq!(row.label, NameAliasLabel::Control); |
| 121 | } |
| 122 | |
| 123 | #[test ] |
| 124 | fn parse3() { |
| 125 | let line = "0081;HIGH OCTET PRESET;figment \n" ; |
| 126 | let row: NameAlias = line.parse().unwrap(); |
| 127 | assert_eq!(row.codepoint, 0x81); |
| 128 | assert_eq!(row.alias, "HIGH OCTET PRESET" ); |
| 129 | assert_eq!(row.label, NameAliasLabel::Figment); |
| 130 | } |
| 131 | |
| 132 | #[test ] |
| 133 | fn parse4() { |
| 134 | let line = "E01EF;VS256;abbreviation \n" ; |
| 135 | let row: NameAlias = line.parse().unwrap(); |
| 136 | assert_eq!(row.codepoint, 0xE01EF); |
| 137 | assert_eq!(row.alias, "VS256" ); |
| 138 | assert_eq!(row.label, NameAliasLabel::Abbreviation); |
| 139 | } |
| 140 | } |
| 141 | |