1use std::path::Path;
2use std::str::FromStr;
3
4use crate::common::{
5 parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
6 UcdFileByCodepoint,
7};
8use crate::error::Error;
9
10/// A single row in the `ScriptExtensions.txt` file.
11#[derive(Clone, Debug, Default, Eq, PartialEq)]
12pub struct ScriptExtension {
13 /// The codepoint or codepoint range for this entry.
14 pub codepoints: Codepoints,
15 /// The script extension names assigned to the codepoints in this entry.
16 pub scripts: Vec<String>,
17}
18
19impl UcdFile for ScriptExtension {
20 fn relative_file_path() -> &'static Path {
21 Path::new("ScriptExtensions.txt")
22 }
23}
24
25impl UcdFileByCodepoint for ScriptExtension {
26 fn codepoints(&self) -> CodepointIter {
27 self.codepoints.into_iter()
28 }
29}
30
31impl FromStr for ScriptExtension {
32 type Err = Error;
33
34 fn from_str(line: &str) -> Result<ScriptExtension, Error> {
35 let (codepoints: Codepoints, scripts: &str) = parse_codepoint_association(line)?;
36 Ok(ScriptExtension {
37 codepoints,
38 scripts: scripts.split_whitespace().map(str::to_string).collect(),
39 })
40 }
41}
42
43#[cfg(test)]
44mod tests {
45 use super::ScriptExtension;
46
47 #[test]
48 fn parse_single() {
49 let line = "060C ; Arab Syrc Thaa # Po ARABIC COMMA\n";
50 let row: ScriptExtension = line.parse().unwrap();
51 assert_eq!(row.codepoints, 0x060C);
52 assert_eq!(row.scripts, vec!["Arab", "Syrc", "Thaa"]);
53 }
54
55 #[test]
56 fn parse_range() {
57 let line = "A836..A837 ; Deva Gujr Guru Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK\n";
58 let row: ScriptExtension = line.parse().unwrap();
59 assert_eq!(row.codepoints, (0xA836, 0xA837));
60 assert_eq!(
61 row.scripts,
62 vec![
63 "Deva", "Gujr", "Guru", "Kthi", "Mahj", "Modi", "Sind",
64 "Takr", "Tirh",
65 ]
66 );
67 }
68}
69