1use std::path::Path;
2
3use crate::{
4 common::{
5 parse_codepoint_sequence, Codepoint, CodepointIter, UcdFile,
6 UcdFileByCodepoint,
7 },
8 error::Error,
9};
10
11/// A single row in the `SpecialCasing.txt` file.
12///
13/// Note that a single codepoint may be mapped multiple times. In particular,
14/// a single codepoint might have mappings based on distinct language sensitive
15/// conditions (e.g., `U+0307`).
16#[derive(Clone, Debug, Default, Eq, PartialEq)]
17pub struct SpecialCaseMapping {
18 /// The codepoint that is being mapped.
19 pub codepoint: Codepoint,
20 /// The lowercase mapping, which may be empty.
21 pub lowercase: Vec<Codepoint>,
22 /// The titlecase mapping, which may be empty.
23 pub titlecase: Vec<Codepoint>,
24 /// The uppercase mapping, which may be empty.
25 pub uppercase: Vec<Codepoint>,
26 /// A list of language specific conditions, see `SpecialCasing.txt` for
27 /// more details.
28 pub conditions: Vec<String>,
29}
30
31impl UcdFile for SpecialCaseMapping {
32 fn relative_file_path() -> &'static Path {
33 Path::new("SpecialCasing.txt")
34 }
35}
36
37impl UcdFileByCodepoint for SpecialCaseMapping {
38 fn codepoints(&self) -> CodepointIter {
39 self.codepoint.into_iter()
40 }
41}
42
43impl std::str::FromStr for SpecialCaseMapping {
44 type Err = Error;
45
46 fn from_str(line: &str) -> Result<SpecialCaseMapping, Error> {
47 let re_parts = regex!(
48 r"(?x)
49 ^
50 \s*(?P<codepoint>[^\s;]+)\s*;
51 \s*(?P<lower>[^;]+)\s*;
52 \s*(?P<title>[^;]+)\s*;
53 \s*(?P<upper>[^;]+)\s*;
54 \s*(?P<conditions>[^;\x23]+)?
55 ",
56 );
57
58 let caps = match re_parts.captures(line.trim()) {
59 Some(caps) => caps,
60 None => return err!("invalid SpecialCasing line: '{}'", line),
61 };
62 let conditions = caps
63 .name("conditions")
64 .map(|x| {
65 x.as_str()
66 .trim()
67 .split_whitespace()
68 .map(|c| c.to_string())
69 .collect()
70 })
71 .unwrap_or(vec![]);
72 Ok(SpecialCaseMapping {
73 codepoint: caps["codepoint"].parse()?,
74 lowercase: parse_codepoint_sequence(&caps["lower"])?,
75 titlecase: parse_codepoint_sequence(&caps["title"])?,
76 uppercase: parse_codepoint_sequence(&caps["upper"])?,
77 conditions,
78 })
79 }
80}
81
82#[cfg(test)]
83mod tests {
84 use super::SpecialCaseMapping;
85
86 #[test]
87 fn parse_no_conds() {
88 let line = "1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA\n";
89 let row: SpecialCaseMapping = line.parse().unwrap();
90 assert_eq!(row.codepoint, 0x1F52);
91 assert_eq!(row.lowercase, vec![0x1F52]);
92 assert_eq!(row.titlecase, vec![0x03A5, 0x0313, 0x0300]);
93 assert_eq!(row.uppercase, vec![0x03A5, 0x0313, 0x0300]);
94 assert!(row.conditions.is_empty());
95 }
96
97 #[test]
98 fn parse_conds() {
99 let line = "0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE\n";
100 let row: SpecialCaseMapping = line.parse().unwrap();
101 assert_eq!(row.codepoint, 0x0307);
102 assert!(row.lowercase.is_empty());
103 assert_eq!(row.titlecase, vec![0x0307]);
104 assert_eq!(row.uppercase, vec![0x0307]);
105 assert_eq!(row.conditions, vec!["tr", "After_I"]);
106 }
107}
108