1use std::path::{Path, PathBuf};
2use std::str::FromStr;
3
4use crate::common::{
5 parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
6 UcdFileByCodepoint,
7};
8use crate::error::Error;
9
10/// A single row in the `emoji-data.txt` file.
11///
12/// The `emoji-data.txt` file is the source of truth on several Emoji-related
13/// Unicode properties.
14///
15/// Note that `emoji-data.txt` is not formally part of the Unicode Character
16/// Database. You can download the Emoji data files separately here:
17/// https://unicode.org/Public/emoji/
18#[derive(Clone, Debug, Default, Eq, PartialEq)]
19pub struct EmojiProperty {
20 /// The codepoint or codepoint range for this entry.
21 pub codepoints: Codepoints,
22 /// The property name assigned to the codepoints in this entry.
23 pub property: String,
24}
25
26impl UcdFile for EmojiProperty {
27 fn relative_file_path() -> &'static Path {
28 Path::new("emoji/emoji-data.txt")
29 }
30
31 fn file_path<P: AsRef<Path>>(ucd_dir: P) -> PathBuf {
32 let ucd_dir = ucd_dir.as_ref();
33 // The standard location, but only on UCDs from 13.0.0 and up.
34 let std = ucd_dir.join(Self::relative_file_path());
35 if std.exists() {
36 std
37 } else {
38 // If the old location does exist, use it.
39 let legacy = ucd_dir.join("emoji-data.txt");
40 if legacy.exists() {
41 legacy
42 } else {
43 // This might end up in an error message, so use the standard
44 // one if forced to choose. Arguably we could do something like
45 // peek
46 std
47 }
48 }
49 }
50}
51
52impl UcdFileByCodepoint for EmojiProperty {
53 fn codepoints(&self) -> CodepointIter {
54 self.codepoints.into_iter()
55 }
56}
57
58impl FromStr for EmojiProperty {
59 type Err = Error;
60
61 fn from_str(line: &str) -> Result<EmojiProperty, Error> {
62 let (codepoints: Codepoints, property: &str) = parse_codepoint_association(line)?;
63 Ok(EmojiProperty { codepoints, property: property.to_string() })
64 }
65}
66
67#[cfg(test)]
68mod tests {
69 use super::EmojiProperty;
70
71 #[test]
72 fn parse_single() {
73 let line = "24C2 ; Emoji # 1.1 [1] (Ⓜ️) circled M\n";
74 let row: EmojiProperty = line.parse().unwrap();
75 assert_eq!(row.codepoints, 0x24C2);
76 assert_eq!(row.property, "Emoji");
77 }
78
79 #[test]
80 fn parse_range() {
81 let line = "1FA6E..1FFFD ; Extended_Pictographic# NA[1424] (🩮️..🿽️) <reserved-1FA6E>..<reserved-1FFFD>\n";
82 let row: EmojiProperty = line.parse().unwrap();
83 assert_eq!(row.codepoints, (0x1FA6E, 0x1FFFD));
84 assert_eq!(row.property, "Extended_Pictographic");
85 }
86}
87