1use std::path::Path;
2use std::str::FromStr;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint};
8use crate::error::Error;
9
10/// A single row in the `Jamo.txt` file.
11///
12/// The `Jamo.txt` file defines the `Jamo_Short_Name` property.
13#[derive(Clone, Debug, Default, Eq, PartialEq)]
14pub struct JamoShortName {
15 /// The codepoint corresponding to this row.
16 pub codepoint: Codepoint,
17 /// The actual "Jamo Short Name." This string contains at most 3 bytes and
18 /// may be empty.
19 pub name: String,
20}
21
22impl UcdFile for JamoShortName {
23 fn relative_file_path() -> &'static Path {
24 Path::new("Jamo.txt")
25 }
26}
27
28impl UcdFileByCodepoint for JamoShortName {
29 fn codepoints(&self) -> CodepointIter {
30 self.codepoint.into_iter()
31 }
32}
33
34impl FromStr for JamoShortName {
35 type Err = Error;
36
37 fn from_str(line: &str) -> Result<JamoShortName, Error> {
38 static PARTS: Lazy<Regex> = Lazy::new(|| {
39 Regex::new(
40 r"(?x)
41 ^
42 (?P<codepoint>[A-Z0-9]+);
43 \s*
44 (?P<name>[A-Z]*)
45 ",
46 )
47 .unwrap()
48 });
49
50 let caps = match PARTS.captures(line.trim()) {
51 Some(caps) => caps,
52 None => return err!("invalid Jamo_Short_name line"),
53 };
54 Ok(JamoShortName {
55 codepoint: caps["codepoint"].parse()?,
56 name: caps.name("name").unwrap().as_str().to_string(),
57 })
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use super::JamoShortName;
64
65 #[test]
66 fn parse1() {
67 let line = "1164; YAE # HANGUL JUNGSEONG YAE\n";
68 let row: JamoShortName = line.parse().unwrap();
69 assert_eq!(row.codepoint, 0x1164);
70 assert_eq!(row.name, "YAE");
71 }
72
73 #[test]
74 fn parse2() {
75 let line = "110B; # HANGUL CHOSEONG IEUNG\n";
76 let row: JamoShortName = line.parse().unwrap();
77 assert_eq!(row.codepoint, 0x110B);
78 assert_eq!(row.name, "");
79 }
80}
81