1use std::path::Path;
2use std::str::FromStr;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use crate::common::{CodepointIter, Codepoints, UcdFile, UcdFileByCodepoint};
8use crate::error::Error;
9
10/// A single row in the `extracted/DerivedNumericValues.txt` file.
11///
12/// This file gives the derived values of the Numeric_Value property.
13#[derive(Clone, Debug, Default, Eq, PartialEq)]
14pub struct DerivedNumericValues {
15 /// The codepoint or codepoint range for this entry.
16 pub codepoints: Codepoints,
17 /// The approximate Numeric_Value of the codepoints in this entry,
18 /// as a decimal.
19 pub numeric_value_decimal: String,
20 /// The exact Numeric_Value of the codepoints in this entry, as
21 /// a fraction.
22 pub numeric_value_fraction: String,
23}
24
25impl UcdFile for DerivedNumericValues {
26 fn relative_file_path() -> &'static Path {
27 Path::new("extracted/DerivedNumericValues.txt")
28 }
29}
30
31impl UcdFileByCodepoint for DerivedNumericValues {
32 fn codepoints(&self) -> CodepointIter {
33 self.codepoints.into_iter()
34 }
35}
36
37impl FromStr for DerivedNumericValues {
38 type Err = Error;
39
40 fn from_str(line: &str) -> Result<DerivedNumericValues, Error> {
41 static PARTS: Lazy<Regex> = Lazy::new(|| {
42 Regex::new(
43 r"(?x)
44 ^
45 \s*(?P<codepoints>[^\s;]+)\s*;
46 \s*(?P<numeric_value_decimal>[^\s;]+)\s*;
47 \s*;
48 \s*(?P<numeric_value_fraction>[^\s;]+)\s*
49 ",
50 )
51 .unwrap()
52 });
53
54 let caps = match PARTS.captures(line.trim()) {
55 Some(caps) => caps,
56 None => return err!("invalid PropList line: '{}'", line),
57 };
58 let codepoints = caps["codepoints"].parse()?;
59 let numeric_value_decimal = caps["numeric_value_decimal"].to_string();
60 let numeric_value_fraction =
61 caps["numeric_value_fraction"].to_string();
62
63 Ok(DerivedNumericValues {
64 codepoints,
65 numeric_value_decimal,
66 numeric_value_fraction,
67 })
68 }
69}
70
71#[cfg(test)]
72mod tests {
73 use super::DerivedNumericValues;
74
75 #[test]
76 fn parse_single() {
77 let line = "0030 ; 0.0 ; ; 0 # Nd DIGIT ZERO\n";
78 let row: DerivedNumericValues = line.parse().unwrap();
79 assert_eq!(row.codepoints, 0x0030);
80 assert_eq!(row.numeric_value_decimal, "0.0");
81 assert_eq!(row.numeric_value_fraction, "0");
82 }
83
84 #[test]
85 fn parse_range() {
86 let line = "11FC9..11FCA ; 0.0625 ; ; 1/16 # No [2] TAMIL FRACTION ONE SIXTEENTH-1..TAMIL FRACTION ONE SIXTEENTH-2\n";
87 let row: DerivedNumericValues = line.parse().unwrap();
88 assert_eq!(row.codepoints, (0x11FC9, 0x11FCA));
89 assert_eq!(row.numeric_value_decimal, "0.0625");
90 assert_eq!(row.numeric_value_fraction, "1/16");
91 }
92}
93