1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::lazy_automaton::LazyAutomaton;
6use crate::provider::*;
7use crate::ListLength;
8#[cfg(feature = "datagen")]
9use alloc::borrow::Cow;
10#[cfg(feature = "datagen")]
11use icu_provider::DataError;
12use writeable::{LengthHint, Writeable};
13
14impl<'data> ListFormatterPatternsV1<'data> {
15 /// Creates a new [`ListFormatterPatternsV1`] from the given patterns. Fails if any pattern is invalid.
16 #[cfg(feature = "datagen")]
17 pub fn try_new(
18 [start, middle, end, pair, short_start, short_middle, short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair]: [&str; 12],
19 ) -> Result<Self, DataError> {
20 Ok(Self([
21 ListJoinerPattern::from_str(start, true, false)?.into(),
22 ListJoinerPattern::from_str(middle, false, false)?.into(),
23 ListJoinerPattern::from_str(end, false, true)?.into(),
24 ListJoinerPattern::from_str(pair, true, true)?.into(),
25 ListJoinerPattern::from_str(short_start, true, false)?.into(),
26 ListJoinerPattern::from_str(short_middle, false, false)?.into(),
27 ListJoinerPattern::from_str(short_end, false, true)?.into(),
28 ListJoinerPattern::from_str(short_pair, true, true)?.into(),
29 ListJoinerPattern::from_str(narrow_start, true, false)?.into(),
30 ListJoinerPattern::from_str(narrow_middle, false, false)?.into(),
31 ListJoinerPattern::from_str(narrow_end, false, true)?.into(),
32 ListJoinerPattern::from_str(narrow_pair, true, true)?.into(),
33 ]))
34 }
35
36 /// Adds a special case to all `pattern`s that will evaluate to
37 /// `alternative_pattern` when `regex` matches the following element.
38 /// The regex is interpreted case-insensitive and anchored to the beginning, but
39 /// to improve efficiency does not search for full matches. If a full match is
40 /// required, use `$`.
41 #[cfg(feature = "datagen")]
42 pub fn make_conditional(
43 &mut self,
44 pattern: &str,
45 regex: &SerdeDFA<'static>,
46 alternative_pattern: &str,
47 ) -> Result<(), DataError> {
48 let old = ListJoinerPattern::from_str(pattern, true, true)?;
49 for i in 0..12 {
50 #[allow(clippy::indexing_slicing)] // self.0 is &[_; 12]
51 if self.0[i].default == old {
52 self.0[i].special_case = Some(SpecialCasePattern {
53 condition: regex.clone(),
54 pattern: ListJoinerPattern::from_str(
55 alternative_pattern,
56 i % 4 == 0 || i % 4 == 3, // allow_prefix = start or pair
57 i % 4 == 2 || i % 4 == 3, // allow_suffix = end or pair
58 )?,
59 });
60 }
61 }
62 Ok(())
63 }
64
65 /// The range of the number of bytes required by the list literals to join a
66 /// list of length `len`. If none of the patterns are conditional, this is exact.
67 pub(crate) fn size_hint(&self, style: ListLength, len: usize) -> LengthHint {
68 match len {
69 0 | 1 => LengthHint::exact(0),
70 2 => self.pair(style).size_hint(),
71 n => {
72 self.start(style).size_hint()
73 + self.middle(style).size_hint() * (n - 3)
74 + self.end(style).size_hint()
75 }
76 }
77 }
78}
79
80type PatternParts<'a> = (&'a str, &'a str, &'a str);
81
82impl<'a> ConditionalListJoinerPattern<'a> {
83 pub(crate) fn parts<'b, W: Writeable + ?Sized>(
84 &'a self,
85 following_value: &'b W,
86 ) -> PatternParts<'a> {
87 match &self.special_case {
88 Some(SpecialCasePattern { condition: &SerdeDFA<'_>, pattern: &ListJoinerPattern<'_> })
89 if condition.deref().matches_earliest_fwd_lazy(haystack:following_value) =>
90 {
91 pattern.borrow_tuple()
92 }
93 _ => self.default.borrow_tuple(),
94 }
95 }
96
97 /// The expected length of this pattern
98 fn size_hint(&'a self) -> LengthHint {
99 let mut hint: LengthHint = self.default.size_hint();
100 if let Some(special_case: &SpecialCasePattern<'a>) = &self.special_case {
101 hint |= special_case.pattern.size_hint()
102 }
103 hint
104 }
105}
106
107impl<'data> ListJoinerPattern<'data> {
108 #[cfg(feature = "datagen")]
109 fn from_str(pattern: &str, allow_prefix: bool, allow_suffix: bool) -> Result<Self, DataError> {
110 match (pattern.find("{0}"), pattern.find("{1}")) {
111 (Some(index_0), Some(index_1))
112 if index_0 < index_1
113 && (allow_prefix || index_0 == 0)
114 && (allow_suffix || index_1 == pattern.len() - 3) =>
115 {
116 if (index_0 > 0 && !cfg!(test)) || index_1 - 3 >= 256 {
117 return Err(DataError::custom(
118 "Found valid pattern that cannot be stored in ListFormatterPatternsV1",
119 )
120 .with_debug_context(pattern));
121 }
122 #[allow(clippy::indexing_slicing)] // find
123 Ok(ListJoinerPattern {
124 string: Cow::Owned(alloc::format!(
125 "{}{}{}",
126 &pattern[0..index_0],
127 &pattern[index_0 + 3..index_1],
128 &pattern[index_1 + 3..]
129 )),
130 index_0: index_0 as u8,
131 index_1: (index_1 - 3) as u8,
132 })
133 }
134 _ => Err(DataError::custom("Invalid list pattern").with_debug_context(pattern)),
135 }
136 }
137
138 fn borrow_tuple(&'data self) -> PatternParts<'data> {
139 #![allow(clippy::indexing_slicing)] // by invariant
140 let index_0 = self.index_0 as usize;
141 let index_1 = self.index_1 as usize;
142 (
143 &self.string[0..index_0],
144 &self.string[index_0..index_1],
145 &self.string[index_1..],
146 )
147 }
148
149 fn size_hint(&self) -> LengthHint {
150 LengthHint::exact(self.string.len())
151 }
152}
153
154#[cfg(feature = "datagen")]
155impl<'data> From<ListJoinerPattern<'data>> for ConditionalListJoinerPattern<'data> {
156 fn from(default: ListJoinerPattern<'data>) -> Self {
157 Self {
158 default,
159 special_case: None,
160 }
161 }
162}
163
164#[cfg(all(test, feature = "datagen"))]
165pub mod test {
166 use super::*;
167
168 pub fn test_patterns() -> ListFormatterPatternsV1<'static> {
169 let mut patterns = ListFormatterPatternsV1::try_new([
170 // Wide: general
171 "@{0}:{1}",
172 "{0},{1}",
173 "{0}.{1}!",
174 "${0};{1}+",
175 // Short: different pattern lengths
176 "{0}1{1}",
177 "{0}12{1}",
178 "{0}12{1}34",
179 "{0}123{1}456",
180 // Narrow: conditionals
181 "{0}: {1}",
182 "{0}, {1}",
183 "{0}. {1}",
184 "{0}. {1}",
185 ])
186 .unwrap();
187 patterns
188 .make_conditional(
189 "{0}. {1}",
190 &SerdeDFA::new(Cow::Borrowed("A")).unwrap(),
191 "{0} :o {1}",
192 )
193 .unwrap();
194 patterns
195 }
196
197 #[test]
198 fn rejects_bad_patterns() {
199 assert!(ListJoinerPattern::from_str("{0} and", true, true).is_err());
200 assert!(ListJoinerPattern::from_str("and {1}", true, true).is_err());
201 assert!(ListJoinerPattern::from_str("{1} and {0}", true, true).is_err());
202 assert!(ListJoinerPattern::from_str("{1{0}}", true, true).is_err());
203 assert!(ListJoinerPattern::from_str("{0\u{202e}} and {1}", true, true).is_err());
204 assert!(ListJoinerPattern::from_str("{{0}} {{1}}", true, true).is_ok());
205
206 assert!(ListJoinerPattern::from_str("{0} and {1} ", true, true).is_ok());
207 assert!(ListJoinerPattern::from_str("{0} and {1} ", true, false).is_err());
208 assert!(ListJoinerPattern::from_str(" {0} and {1}", true, true).is_ok());
209 assert!(ListJoinerPattern::from_str(" {0} and {1}", false, true).is_err());
210 }
211
212 #[test]
213 fn produces_correct_parts() {
214 assert_eq!(
215 test_patterns().pair(ListLength::Wide).parts(""),
216 ("$", ";", "+")
217 );
218 }
219
220 #[test]
221 fn produces_correct_parts_conditionally() {
222 assert_eq!(
223 test_patterns().end(ListLength::Narrow).parts("A"),
224 ("", " :o ", "")
225 );
226 assert_eq!(
227 test_patterns().end(ListLength::Narrow).parts("a"),
228 ("", " :o ", "")
229 );
230 assert_eq!(
231 test_patterns().end(ListLength::Narrow).parts("ab"),
232 ("", " :o ", "")
233 );
234 assert_eq!(
235 test_patterns().end(ListLength::Narrow).parts("B"),
236 ("", ". ", "")
237 );
238 assert_eq!(
239 test_patterns().end(ListLength::Narrow).parts("BA"),
240 ("", ". ", "")
241 );
242 }
243
244 #[test]
245 fn size_hint_works() {
246 let pattern = test_patterns();
247
248 assert_eq!(
249 pattern.size_hint(ListLength::Short, 0),
250 LengthHint::exact(0)
251 );
252 assert_eq!(
253 pattern.size_hint(ListLength::Short, 1),
254 LengthHint::exact(0)
255 );
256
257 // pair pattern "{0}123{1}456"
258 assert_eq!(
259 pattern.size_hint(ListLength::Short, 2),
260 LengthHint::exact(6)
261 );
262
263 // patterns "{0}1{1}", "{0}12{1}" (x197), and "{0}12{1}34"
264 assert_eq!(
265 pattern.size_hint(ListLength::Short, 200),
266 LengthHint::exact(1 + 2 * 197 + 4)
267 );
268
269 // patterns "{0}: {1}", "{0}, {1}" (x197), and "{0} :o {1}" or "{0}. {1}"
270 assert_eq!(
271 pattern.size_hint(ListLength::Narrow, 200),
272 LengthHint::exact(2 + 197 * 2) + LengthHint::between(2, 4)
273 );
274 }
275}
276