1 | use std::path::Path; |
2 | use std::str::FromStr; |
3 | |
4 | use once_cell::sync::Lazy; |
5 | use regex::Regex; |
6 | |
7 | use crate::common::{CodepointIter, Codepoints, UcdFile, UcdFileByCodepoint}; |
8 | use crate::error::Error; |
9 | |
10 | /// A single row in the `extracted/DerivedNumericValues.txt` file. |
11 | /// |
12 | /// This file gives the derived values of the Numeric_Value property. |
13 | #[derive (Clone, Debug, Default, Eq, PartialEq)] |
14 | pub struct DerivedNumericValues { |
15 | /// The codepoint or codepoint range for this entry. |
16 | pub codepoints: Codepoints, |
17 | /// The approximate Numeric_Value of the codepoints in this entry, |
18 | /// as a decimal. |
19 | pub numeric_value_decimal: String, |
20 | /// The exact Numeric_Value of the codepoints in this entry, as |
21 | /// a fraction. |
22 | pub numeric_value_fraction: String, |
23 | } |
24 | |
25 | impl UcdFile for DerivedNumericValues { |
26 | fn relative_file_path() -> &'static Path { |
27 | Path::new("extracted/DerivedNumericValues.txt" ) |
28 | } |
29 | } |
30 | |
31 | impl UcdFileByCodepoint for DerivedNumericValues { |
32 | fn codepoints(&self) -> CodepointIter { |
33 | self.codepoints.into_iter() |
34 | } |
35 | } |
36 | |
37 | impl FromStr for DerivedNumericValues { |
38 | type Err = Error; |
39 | |
40 | fn from_str(line: &str) -> Result<DerivedNumericValues, Error> { |
41 | static PARTS: Lazy<Regex> = Lazy::new(|| { |
42 | Regex::new( |
43 | r"(?x) |
44 | ^ |
45 | \s*(?P<codepoints>[^\s;]+)\s*; |
46 | \s*(?P<numeric_value_decimal>[^\s;]+)\s*; |
47 | \s*; |
48 | \s*(?P<numeric_value_fraction>[^\s;]+)\s* |
49 | " , |
50 | ) |
51 | .unwrap() |
52 | }); |
53 | |
54 | let caps = match PARTS.captures(line.trim()) { |
55 | Some(caps) => caps, |
56 | None => return err!("invalid PropList line: ' {}'" , line), |
57 | }; |
58 | let codepoints = caps["codepoints" ].parse()?; |
59 | let numeric_value_decimal = caps["numeric_value_decimal" ].to_string(); |
60 | let numeric_value_fraction = |
61 | caps["numeric_value_fraction" ].to_string(); |
62 | |
63 | Ok(DerivedNumericValues { |
64 | codepoints, |
65 | numeric_value_decimal, |
66 | numeric_value_fraction, |
67 | }) |
68 | } |
69 | } |
70 | |
71 | #[cfg (test)] |
72 | mod tests { |
73 | use super::DerivedNumericValues; |
74 | |
75 | #[test ] |
76 | fn parse_single() { |
77 | let line = "0030 ; 0.0 ; ; 0 # Nd DIGIT ZERO \n" ; |
78 | let row: DerivedNumericValues = line.parse().unwrap(); |
79 | assert_eq!(row.codepoints, 0x0030); |
80 | assert_eq!(row.numeric_value_decimal, "0.0" ); |
81 | assert_eq!(row.numeric_value_fraction, "0" ); |
82 | } |
83 | |
84 | #[test ] |
85 | fn parse_range() { |
86 | let line = "11FC9..11FCA ; 0.0625 ; ; 1/16 # No [2] TAMIL FRACTION ONE SIXTEENTH-1..TAMIL FRACTION ONE SIXTEENTH-2 \n" ; |
87 | let row: DerivedNumericValues = line.parse().unwrap(); |
88 | assert_eq!(row.codepoints, (0x11FC9, 0x11FCA)); |
89 | assert_eq!(row.numeric_value_decimal, "0.0625" ); |
90 | assert_eq!(row.numeric_value_fraction, "1/16" ); |
91 | } |
92 | } |
93 | |