1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::lazy_automaton::LazyAutomaton;
6use crate::provider::*;
7use crate::ListLength;
8#[cfg(feature = "datagen")]
9use alloc::borrow::Cow;
10#[cfg(feature = "datagen")]
11use icu_provider::DataError;
12use writeable::{LengthHint, Writeable};
13
14impl<'data> ListFormatterPatternsV1<'data> {
15 /// Creates a new [`ListFormatterPatternsV1`] from the given patterns. Fails if any pattern is invalid.
16 ///
17 /// See [`ListJoinerPattern::from_str`]. `allow_prefix` will be true for `pair` and `end` patterns,
18 /// `allow_suffix` for `start` and `pair` patterns.
19 #[cfg(feature = "datagen")]
20 pub fn try_new(
21 [start, middle, end, pair, short_start, short_middle, short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair]: [&str; 12],
22 ) -> Result<Self, DataError> {
23 Ok(Self([
24 ListJoinerPattern::from_str(start, true, false)?.into(),
25 ListJoinerPattern::from_str(middle, false, false)?.into(),
26 ListJoinerPattern::from_str(end, false, true)?.into(),
27 ListJoinerPattern::from_str(pair, true, true)?.into(),
28 ListJoinerPattern::from_str(short_start, true, false)?.into(),
29 ListJoinerPattern::from_str(short_middle, false, false)?.into(),
30 ListJoinerPattern::from_str(short_end, false, true)?.into(),
31 ListJoinerPattern::from_str(short_pair, true, true)?.into(),
32 ListJoinerPattern::from_str(narrow_start, true, false)?.into(),
33 ListJoinerPattern::from_str(narrow_middle, false, false)?.into(),
34 ListJoinerPattern::from_str(narrow_end, false, true)?.into(),
35 ListJoinerPattern::from_str(narrow_pair, true, true)?.into(),
36 ]))
37 }
38
39 /// Adds a special case to all `pattern`s that will evaluate to
40 /// `alternative_pattern` when `regex` matches the following element.
41 /// The regex is interpreted case-insensitive and anchored to the beginning, but
42 /// to improve efficiency does not search for full matches. If a full match is
43 /// required, use `$`.
44 #[cfg(feature = "datagen")]
45 pub fn make_conditional(
46 &mut self,
47 pattern: &str,
48 regex: &SerdeDFA<'static>,
49 alternative_pattern: &str,
50 ) -> Result<(), DataError> {
51 let old = ListJoinerPattern::from_str(pattern, true, true)?;
52 for i in 0..12 {
53 #[allow(clippy::indexing_slicing)] // self.0 is &[_; 12]
54 if self.0[i].default == old {
55 self.0[i].special_case = Some(SpecialCasePattern {
56 condition: regex.clone(),
57 pattern: ListJoinerPattern::from_str(
58 alternative_pattern,
59 i % 4 == 0 || i % 4 == 3, // allow_prefix = start or pair
60 i % 4 == 2 || i % 4 == 3, // allow_suffix = end or pair
61 )?,
62 });
63 }
64 }
65 Ok(())
66 }
67
68 /// The range of the number of bytes required by the list literals to join a
69 /// list of length `len`. If none of the patterns are conditional, this is exact.
70 pub(crate) fn size_hint(&self, style: ListLength, len: usize) -> LengthHint {
71 match len {
72 0 | 1 => LengthHint::exact(0),
73 2 => self.pair(style).size_hint(),
74 n => {
75 self.start(style).size_hint()
76 + self.middle(style).size_hint() * (n - 3)
77 + self.end(style).size_hint()
78 }
79 }
80 }
81}
82
83type PatternParts<'a> = (&'a str, &'a str, &'a str);
84
85impl<'a> ConditionalListJoinerPattern<'a> {
86 pub(crate) fn parts<'b, W: Writeable + ?Sized>(
87 &'a self,
88 following_value: &'b W,
89 ) -> PatternParts<'a> {
90 match &self.special_case {
91 Some(SpecialCasePattern { condition: &SerdeDFA<'_>, pattern: &ListJoinerPattern<'_> })
92 if condition.deref().matches_earliest_fwd_lazy(haystack:following_value) =>
93 {
94 pattern.borrow_tuple()
95 }
96 _ => self.default.borrow_tuple(),
97 }
98 }
99
100 /// The expected length of this pattern
101 fn size_hint(&'a self) -> LengthHint {
102 let mut hint: LengthHint = self.default.size_hint();
103 if let Some(special_case: &SpecialCasePattern<'_>) = &self.special_case {
104 hint |= special_case.pattern.size_hint()
105 }
106 hint
107 }
108}
109
110impl<'data> ListJoinerPattern<'data> {
111 /// Construct the pattern from a CLDR pattern string
112 #[cfg(feature = "datagen")]
113 pub fn from_str(
114 pattern: &str,
115 allow_prefix: bool,
116 allow_suffix: bool,
117 ) -> Result<Self, DataError> {
118 match (pattern.find("{0}"), pattern.find("{1}")) {
119 (Some(index_0), Some(index_1))
120 if index_0 < index_1
121 && (allow_prefix || index_0 == 0)
122 && (allow_suffix || index_1 == pattern.len() - 3) =>
123 {
124 if (index_0 > 0 && !cfg!(test)) || index_1 - 3 >= 256 {
125 return Err(DataError::custom(
126 "Found valid pattern that cannot be stored in ListFormatterPatternsV1",
127 )
128 .with_debug_context(pattern));
129 }
130 #[allow(clippy::indexing_slicing)] // find
131 Ok(ListJoinerPattern {
132 string: Cow::Owned(alloc::format!(
133 "{}{}{}",
134 &pattern[0..index_0],
135 &pattern[index_0 + 3..index_1],
136 &pattern[index_1 + 3..]
137 )),
138 index_0: index_0 as u8,
139 index_1: (index_1 - 3) as u8,
140 })
141 }
142 _ => Err(DataError::custom("Invalid list pattern").with_debug_context(pattern)),
143 }
144 }
145
146 fn borrow_tuple(&'data self) -> PatternParts<'data> {
147 #![allow(clippy::indexing_slicing)] // by invariant
148 let index_0 = self.index_0 as usize;
149 let index_1 = self.index_1 as usize;
150 (
151 &self.string[0..index_0],
152 &self.string[index_0..index_1],
153 &self.string[index_1..],
154 )
155 }
156
157 fn size_hint(&self) -> LengthHint {
158 LengthHint::exact(self.string.len())
159 }
160}
161
162#[cfg(feature = "datagen")]
163impl<'data> From<ListJoinerPattern<'data>> for ConditionalListJoinerPattern<'data> {
164 fn from(default: ListJoinerPattern<'data>) -> Self {
165 Self {
166 default,
167 special_case: None,
168 }
169 }
170}
171
172#[cfg(all(test, feature = "datagen"))]
173pub mod test {
174 use super::*;
175
176 pub fn test_patterns() -> ListFormatterPatternsV1<'static> {
177 let mut patterns = ListFormatterPatternsV1::try_new([
178 // Wide: general
179 "@{0}:{1}",
180 "{0},{1}",
181 "{0}.{1}!",
182 "${0};{1}+",
183 // Short: different pattern lengths
184 "{0}1{1}",
185 "{0}12{1}",
186 "{0}12{1}34",
187 "{0}123{1}456",
188 // Narrow: conditionals
189 "{0}: {1}",
190 "{0}, {1}",
191 "{0}. {1}",
192 "{0}. {1}",
193 ])
194 .unwrap();
195 patterns
196 .make_conditional(
197 "{0}. {1}",
198 &SerdeDFA::new(Cow::Borrowed("A")).unwrap(),
199 "{0} :o {1}",
200 )
201 .unwrap();
202 patterns
203 }
204
205 #[test]
206 fn rejects_bad_patterns() {
207 assert!(ListJoinerPattern::from_str("{0} and", true, true).is_err());
208 assert!(ListJoinerPattern::from_str("and {1}", true, true).is_err());
209 assert!(ListJoinerPattern::from_str("{1} and {0}", true, true).is_err());
210 assert!(ListJoinerPattern::from_str("{1{0}}", true, true).is_err());
211 assert!(ListJoinerPattern::from_str("{0\u{202e}} and {1}", true, true).is_err());
212 assert!(ListJoinerPattern::from_str("{{0}} {{1}}", true, true).is_ok());
213
214 assert!(ListJoinerPattern::from_str("{0} and {1} ", true, true).is_ok());
215 assert!(ListJoinerPattern::from_str("{0} and {1} ", true, false).is_err());
216 assert!(ListJoinerPattern::from_str(" {0} and {1}", true, true).is_ok());
217 assert!(ListJoinerPattern::from_str(" {0} and {1}", false, true).is_err());
218 }
219
220 #[test]
221 fn produces_correct_parts() {
222 assert_eq!(
223 test_patterns().pair(ListLength::Wide).parts(""),
224 ("$", ";", "+")
225 );
226 }
227
228 #[test]
229 fn produces_correct_parts_conditionally() {
230 assert_eq!(
231 test_patterns().end(ListLength::Narrow).parts("A"),
232 ("", " :o ", "")
233 );
234 assert_eq!(
235 test_patterns().end(ListLength::Narrow).parts("a"),
236 ("", " :o ", "")
237 );
238 assert_eq!(
239 test_patterns().end(ListLength::Narrow).parts("ab"),
240 ("", " :o ", "")
241 );
242 assert_eq!(
243 test_patterns().end(ListLength::Narrow).parts("B"),
244 ("", ". ", "")
245 );
246 assert_eq!(
247 test_patterns().end(ListLength::Narrow).parts("BA"),
248 ("", ". ", "")
249 );
250 }
251
252 #[test]
253 fn size_hint_works() {
254 let pattern = test_patterns();
255
256 assert_eq!(
257 pattern.size_hint(ListLength::Short, 0),
258 LengthHint::exact(0)
259 );
260 assert_eq!(
261 pattern.size_hint(ListLength::Short, 1),
262 LengthHint::exact(0)
263 );
264
265 // pair pattern "{0}123{1}456"
266 assert_eq!(
267 pattern.size_hint(ListLength::Short, 2),
268 LengthHint::exact(6)
269 );
270
271 // patterns "{0}1{1}", "{0}12{1}" (x197), and "{0}12{1}34"
272 assert_eq!(
273 pattern.size_hint(ListLength::Short, 200),
274 LengthHint::exact(1 + 2 * 197 + 4)
275 );
276
277 // patterns "{0}: {1}", "{0}, {1}" (x197), and "{0} :o {1}" or "{0}. {1}"
278 assert_eq!(
279 pattern.size_hint(ListLength::Narrow, 200),
280 LengthHint::exact(2 + 197 * 2) + LengthHint::between(2, 4)
281 );
282 }
283}
284