1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use alloc::vec; |
6 | use alloc::vec::Vec; |
7 | use core::{char, cmp::Ordering, ops::RangeBounds}; |
8 | |
9 | use crate::codepointinvlist::{utils::deconstruct_range, CodePointInversionList}; |
10 | use zerovec::{ule::AsULE, ZeroVec}; |
11 | |
12 | /// A builder for [`CodePointInversionList`]. |
13 | /// |
14 | /// Provides exposure to builder functions and conversion to [`CodePointInversionList`] |
15 | #[derive (Default)] |
16 | pub struct CodePointInversionListBuilder { |
17 | // A sorted list of even length, with values <= char::MAX + 1 |
18 | intervals: Vec<u32>, |
19 | } |
20 | |
21 | impl CodePointInversionListBuilder { |
22 | /// Returns empty [`CodePointInversionListBuilder`] |
23 | pub const fn new() -> Self { |
24 | Self { intervals: vec![] } |
25 | } |
26 | |
27 | /// Returns a [`CodePointInversionList`] and consumes the [`CodePointInversionListBuilder`] |
28 | pub fn build(self) -> CodePointInversionList<'static> { |
29 | let inv_list: ZeroVec<u32> = ZeroVec::alloc_from_slice(&self.intervals); |
30 | #[allow (clippy::unwrap_used)] // by invariant |
31 | CodePointInversionList::try_from_inversion_list(inv_list).unwrap() |
32 | } |
33 | |
34 | /// Abstraction for adding/removing a range from start..end |
35 | /// |
36 | /// If add is true add, else remove |
37 | fn add_remove_middle(&mut self, start: u32, end: u32, add: bool) { |
38 | if start >= end || end > char::MAX as u32 + 1 { |
39 | return; |
40 | } |
41 | let start_res = self.intervals.binary_search(&start); |
42 | let end_res = self.intervals.binary_search(&end); |
43 | let mut start_ind = start_res.unwrap_or_else(|x| x); |
44 | let mut end_ind = end_res.unwrap_or_else(|x| x); |
45 | let start_pos_check = (start_ind % 2 == 0) == add; |
46 | let end_pos_check = (end_ind % 2 == 0) == add; |
47 | let start_eq_end = start_ind == end_ind; |
48 | |
49 | #[allow (clippy::indexing_slicing)] // all indices are binary search results |
50 | if start_eq_end && start_pos_check && end_res.is_err() { |
51 | let ins = &[start, end]; |
52 | self.intervals |
53 | .splice(start_ind..end_ind, ins.iter().copied()); |
54 | } else { |
55 | if start_pos_check { |
56 | self.intervals[start_ind] = start; |
57 | start_ind += 1; |
58 | } |
59 | if end_pos_check { |
60 | if end_res.is_ok() { |
61 | end_ind += 1; |
62 | } else { |
63 | end_ind -= 1; |
64 | self.intervals[end_ind] = end; |
65 | } |
66 | } |
67 | if start_ind < end_ind { |
68 | self.intervals.drain(start_ind..end_ind); |
69 | } |
70 | } |
71 | } |
72 | |
73 | /// Add the range to the [`CodePointInversionListBuilder`] |
74 | /// |
75 | /// Accomplishes this through binary search for the start and end indices and merges intervals |
76 | /// in between with inplace memory. Performs `O(1)` operation if adding to end of list, and `O(N)` otherwise, |
77 | /// where `N` is the number of endpoints. |
78 | fn add(&mut self, start: u32, end: u32) { |
79 | if start >= end { |
80 | return; |
81 | } |
82 | if self.intervals.is_empty() { |
83 | self.intervals.extend_from_slice(&[start, end]); |
84 | return; |
85 | } |
86 | self.add_remove_middle(start, end, true); |
87 | } |
88 | |
89 | /// Add the character to the [`CodePointInversionListBuilder`] |
90 | /// |
91 | /// # Examples |
92 | /// |
93 | /// ``` |
94 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
95 | /// let mut builder = CodePointInversionListBuilder::new(); |
96 | /// builder.add_char('a' ); |
97 | /// let check = builder.build(); |
98 | /// assert_eq!(check.iter_chars().next(), Some('a' )); |
99 | /// ``` |
100 | pub fn add_char(&mut self, c: char) { |
101 | let to_add = c as u32; |
102 | self.add(to_add, to_add + 1); |
103 | } |
104 | |
105 | /// Add the code point value to the [`CodePointInversionListBuilder`] |
106 | /// |
107 | /// Note: Even though [`u32`] and [`prim@char`] in Rust are non-negative 4-byte |
108 | /// values, there is an important difference. A [`u32`] can take values up to |
109 | /// a very large integer value, while a [`prim@char`] in Rust is defined to be in |
110 | /// the range from 0 to the maximum valid Unicode Scalar Value. |
111 | /// |
112 | /// # Examples |
113 | /// |
114 | /// ``` |
115 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
116 | /// let mut builder = CodePointInversionListBuilder::new(); |
117 | /// builder.add32(0x41); |
118 | /// let check = builder.build(); |
119 | /// assert!(check.contains32(0x41)); |
120 | /// ``` |
121 | pub fn add32(&mut self, c: u32) { |
122 | if c <= char::MAX as u32 { |
123 | // we already know 0 <= c because c: u32 |
124 | self.add(c, c + 1); |
125 | } |
126 | } |
127 | |
128 | /// Same as [`Self::add32`]. |
129 | #[deprecated (since = "1.5.0" , note = "Use `add32`" )] |
130 | pub fn add_u32(&mut self, c: u32) { |
131 | self.add32(c) |
132 | } |
133 | |
134 | /// Add the range of characters to the [`CodePointInversionListBuilder`] |
135 | /// |
136 | /// # Examples |
137 | /// |
138 | /// ``` |
139 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
140 | /// let mut builder = CodePointInversionListBuilder::new(); |
141 | /// builder.add_range(&('A' ..='Z' )); |
142 | /// let check = builder.build(); |
143 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
144 | /// ``` |
145 | pub fn add_range(&mut self, range: &impl RangeBounds<char>) { |
146 | let (start, end) = deconstruct_range(range); |
147 | self.add(start, end); |
148 | } |
149 | |
150 | /// Add the range of characters, represented as u32, to the [`CodePointInversionListBuilder`] |
151 | /// |
152 | /// # Examples |
153 | /// |
154 | /// ``` |
155 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
156 | /// let mut builder = CodePointInversionListBuilder::new(); |
157 | /// builder.add_range32(&(0xd800..=0xdfff)); |
158 | /// let check = builder.build(); |
159 | /// assert!(check.contains32(0xd900)); |
160 | /// ``` |
161 | pub fn add_range32(&mut self, range: &impl RangeBounds<u32>) { |
162 | let (start, end) = deconstruct_range(range); |
163 | // Sets that include char::MAX need to allow an end value of MAX + 1 |
164 | if start <= end && end <= char::MAX as u32 + 1 { |
165 | self.add(start, end); |
166 | } |
167 | } |
168 | |
169 | /// Same as [`Self::add_range32`]. |
170 | #[deprecated (since = "1.5.0" , note = "Use `add_range32`" )] |
171 | pub fn add_range_u32(&mut self, range: &impl RangeBounds<u32>) { |
172 | self.add_range32(range) |
173 | } |
174 | |
175 | /// Add the [`CodePointInversionList`] reference to the [`CodePointInversionListBuilder`] |
176 | /// |
177 | /// # Examples |
178 | /// |
179 | /// ``` |
180 | /// use icu::collections::codepointinvlist::{ |
181 | /// CodePointInversionList, CodePointInversionListBuilder, |
182 | /// }; |
183 | /// let mut builder = CodePointInversionListBuilder::new(); |
184 | /// let set = |
185 | /// CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x4C]) |
186 | /// .unwrap(); |
187 | /// builder.add_set(&set); |
188 | /// let check = builder.build(); |
189 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
190 | /// ``` |
191 | #[allow (unused_assignments)] |
192 | pub fn add_set(&mut self, set: &CodePointInversionList) { |
193 | #[allow (clippy::indexing_slicing)] // chunks |
194 | set.as_inversion_list() |
195 | .as_ule_slice() |
196 | .chunks(2) |
197 | .for_each(|pair| { |
198 | self.add( |
199 | AsULE::from_unaligned(pair[0]), |
200 | AsULE::from_unaligned(pair[1]), |
201 | ) |
202 | }); |
203 | } |
204 | |
205 | /// Removes the range from the [`CodePointInversionListBuilder`] |
206 | /// |
207 | /// Performs binary search to find start and end affected intervals, then removes in an `O(N)` fashion |
208 | /// where `N` is the number of endpoints, with in-place memory. |
209 | fn remove(&mut self, start: u32, end: u32) { |
210 | if start >= end || self.intervals.is_empty() { |
211 | return; |
212 | } |
213 | if let Some(&last) = self.intervals.last() { |
214 | #[allow (clippy::indexing_slicing)] |
215 | // by invariant, if we have a last we have a (different) first |
216 | if start <= self.intervals[0] && end >= last { |
217 | self.intervals.clear(); |
218 | } else { |
219 | self.add_remove_middle(start, end, false); |
220 | } |
221 | } |
222 | } |
223 | |
224 | /// Remove the character from the [`CodePointInversionListBuilder`] |
225 | /// |
226 | /// # Examples |
227 | /// |
228 | /// ``` |
229 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
230 | /// let mut builder = CodePointInversionListBuilder::new(); |
231 | /// builder.add_range(&('A' ..='Z' )); |
232 | /// builder.remove_char('A' ); |
233 | /// let check = builder.build(); |
234 | /// assert_eq!(check.iter_chars().next(), Some('B' )); |
235 | pub fn remove_char(&mut self, c: char) { |
236 | self.remove32(c as u32) |
237 | } |
238 | |
239 | /// See [`Self::remove_char`] |
240 | pub fn remove32(&mut self, c: u32) { |
241 | self.remove(c, c + 1); |
242 | } |
243 | |
244 | /// Remove the range of characters from the [`CodePointInversionListBuilder`] |
245 | /// |
246 | /// # Examples |
247 | /// |
248 | /// ``` |
249 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
250 | /// let mut builder = CodePointInversionListBuilder::new(); |
251 | /// builder.add_range(&('A' ..='Z' )); |
252 | /// builder.remove_range(&('A' ..='C' )); |
253 | /// let check = builder.build(); |
254 | /// assert_eq!(check.iter_chars().next(), Some('D' )); |
255 | pub fn remove_range(&mut self, range: &impl RangeBounds<char>) { |
256 | let (start, end) = deconstruct_range(range); |
257 | self.remove(start, end); |
258 | } |
259 | |
260 | /// See [`Self::remove_range`] |
261 | pub fn remove_range32(&mut self, range: &impl RangeBounds<u32>) { |
262 | let (start, end) = deconstruct_range(range); |
263 | self.remove(start, end); |
264 | } |
265 | |
266 | /// Remove the [`CodePointInversionList`] from the [`CodePointInversionListBuilder`] |
267 | /// |
268 | /// # Examples |
269 | /// |
270 | /// ``` |
271 | /// use icu::collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder}; |
272 | /// let mut builder = CodePointInversionListBuilder::new(); |
273 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x46]).unwrap(); |
274 | /// builder.add_range(&('A' ..='Z' )); |
275 | /// builder.remove_set(&set); // removes 'A'..='E' |
276 | /// let check = builder.build(); |
277 | /// assert_eq!(check.iter_chars().next(), Some('F' )); |
278 | #[allow (clippy::indexing_slicing)] // chunks |
279 | pub fn remove_set(&mut self, set: &CodePointInversionList) { |
280 | set.as_inversion_list() |
281 | .as_ule_slice() |
282 | .chunks(2) |
283 | .for_each(|pair| { |
284 | self.remove( |
285 | AsULE::from_unaligned(pair[0]), |
286 | AsULE::from_unaligned(pair[1]), |
287 | ) |
288 | }); |
289 | } |
290 | |
291 | /// Retain the specified character in the [`CodePointInversionListBuilder`] if it exists |
292 | /// |
293 | /// # Examples |
294 | /// |
295 | /// ``` |
296 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
297 | /// let mut builder = CodePointInversionListBuilder::new(); |
298 | /// builder.add_range(&('A' ..='Z' )); |
299 | /// builder.retain_char('A' ); |
300 | /// let set = builder.build(); |
301 | /// let mut check = set.iter_chars(); |
302 | /// assert_eq!(check.next(), Some('A' )); |
303 | /// assert_eq!(check.next(), None); |
304 | /// ``` |
305 | pub fn retain_char(&mut self, c: char) { |
306 | self.retain32(c as u32) |
307 | } |
308 | |
309 | /// See [`Self::retain_char`] |
310 | pub fn retain32(&mut self, c: u32) { |
311 | self.remove(0, c); |
312 | self.remove(c + 1, (char::MAX as u32) + 1); |
313 | } |
314 | |
315 | /// Retain the range of characters located within the [`CodePointInversionListBuilder`] |
316 | /// |
317 | /// # Examples |
318 | /// |
319 | /// ``` |
320 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
321 | /// let mut builder = CodePointInversionListBuilder::new(); |
322 | /// builder.add_range(&('A' ..='Z' )); |
323 | /// builder.retain_range(&('A' ..='B' )); |
324 | /// let set = builder.build(); |
325 | /// let mut check = set.iter_chars(); |
326 | /// assert_eq!(check.next(), Some('A' )); |
327 | /// assert_eq!(check.next(), Some('B' )); |
328 | /// assert_eq!(check.next(), None); |
329 | /// ``` |
330 | pub fn retain_range(&mut self, range: &impl RangeBounds<char>) { |
331 | let (start, end) = deconstruct_range(range); |
332 | self.remove(0, start); |
333 | self.remove(end, (char::MAX as u32) + 1); |
334 | } |
335 | |
336 | /// See [`Self::retain_range`] |
337 | pub fn retain_range32(&mut self, range: &impl RangeBounds<u32>) { |
338 | let (start, end) = deconstruct_range(range); |
339 | self.remove(0, start); |
340 | self.remove(end, (char::MAX as u32) + 1); |
341 | } |
342 | |
343 | /// Retain the elements in the specified set within the [`CodePointInversionListBuilder`] |
344 | /// |
345 | /// # Examples |
346 | /// |
347 | /// ``` |
348 | /// use icu::collections::codepointinvlist::{ |
349 | /// CodePointInversionList, CodePointInversionListBuilder, |
350 | /// }; |
351 | /// let mut builder = CodePointInversionListBuilder::new(); |
352 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[65, 70]) |
353 | /// .unwrap(); |
354 | /// builder.add_range(&('A' ..='Z' )); |
355 | /// builder.retain_set(&set); // retains 'A'..='E' |
356 | /// let check = builder.build(); |
357 | /// assert!(check.contains('A' )); |
358 | /// assert!(!check.contains('G' )); |
359 | /// ``` |
360 | #[allow (clippy::indexing_slicing)] // chunks |
361 | pub fn retain_set(&mut self, set: &CodePointInversionList) { |
362 | let mut prev = 0; |
363 | for pair in set.as_inversion_list().as_ule_slice().chunks(2) { |
364 | let range_start = AsULE::from_unaligned(pair[0]); |
365 | let range_limit = AsULE::from_unaligned(pair[1]); |
366 | self.remove(prev, range_start); |
367 | prev = range_limit; |
368 | } |
369 | self.remove(prev, (char::MAX as u32) + 1); |
370 | } |
371 | |
372 | /// Computes the complement of the argument, adding any elements that do not yet exist in the builder, |
373 | /// and removing any elements that already exist in the builder. See public functions for examples. |
374 | /// |
375 | /// Performs in `O(B + S)`, where `B` is the number of endpoints in the Builder, and `S` is the number |
376 | /// of endpoints in the argument. |
377 | fn complement_list(&mut self, set_iter: impl core::iter::Iterator<Item = u32>) { |
378 | let mut res: Vec<u32> = vec![]; // not the biggest fan of having to allocate new memory |
379 | let mut ai = self.intervals.iter(); |
380 | let mut bi = set_iter; |
381 | let mut a = ai.next(); |
382 | let mut b = bi.next(); |
383 | while let (Some(c), Some(d)) = (a, b) { |
384 | match c.cmp(&d) { |
385 | Ordering::Less => { |
386 | res.push(*c); |
387 | a = ai.next(); |
388 | } |
389 | Ordering::Greater => { |
390 | res.push(d); |
391 | b = bi.next(); |
392 | } |
393 | Ordering::Equal => { |
394 | a = ai.next(); |
395 | b = bi.next(); |
396 | } |
397 | } |
398 | } |
399 | if let Some(c) = a { |
400 | res.push(*c) |
401 | } |
402 | if let Some(d) = b { |
403 | res.push(d) |
404 | } |
405 | res.extend(ai); |
406 | res.extend(bi); |
407 | self.intervals = res; |
408 | } |
409 | |
410 | /// Computes the complement of the builder, inverting the builder (any elements in the builder are removed, |
411 | /// while any elements not in the builder are added) |
412 | /// |
413 | /// # Examples |
414 | /// |
415 | /// ``` |
416 | /// use icu::collections::codepointinvlist::{ |
417 | /// CodePointInversionList, CodePointInversionListBuilder, |
418 | /// }; |
419 | /// let mut builder = CodePointInversionListBuilder::new(); |
420 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[ |
421 | /// 0x0, |
422 | /// 0x41, |
423 | /// 0x46, |
424 | /// (std::char::MAX as u32) + 1, |
425 | /// ]) |
426 | /// .unwrap(); |
427 | /// builder.add_set(&set); |
428 | /// builder.complement(); |
429 | /// let check = builder.build(); |
430 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
431 | /// ``` |
432 | pub fn complement(&mut self) { |
433 | if !self.intervals.is_empty() { |
434 | #[allow (clippy::indexing_slicing)] // by invariant |
435 | if self.intervals[0] == 0 { |
436 | self.intervals.drain(0..1); |
437 | } else { |
438 | self.intervals.insert(0, 0); |
439 | } |
440 | if self.intervals.last() == Some(&(char::MAX as u32 + 1)) { |
441 | self.intervals.pop(); |
442 | } else { |
443 | self.intervals.push(char::MAX as u32 + 1); |
444 | } |
445 | } else { |
446 | self.intervals |
447 | .extend_from_slice(&[0, (char::MAX as u32 + 1)]); |
448 | } |
449 | } |
450 | |
451 | /// Complements the character in the builder, adding it if not in the builder, and removing it otherwise. |
452 | /// |
453 | /// # Examples |
454 | /// |
455 | /// ``` |
456 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
457 | /// let mut builder = CodePointInversionListBuilder::new(); |
458 | /// builder.add_range(&('A' ..='D' )); |
459 | /// builder.complement_char('A' ); |
460 | /// builder.complement_char('E' ); |
461 | /// let check = builder.build(); |
462 | /// assert!(check.contains('E' )); |
463 | /// assert!(!check.contains('A' )); |
464 | /// ``` |
465 | pub fn complement_char(&mut self, c: char) { |
466 | self.complement32(c as u32); |
467 | } |
468 | |
469 | /// See [`Self::complement_char`] |
470 | pub fn complement32(&mut self, c: u32) { |
471 | self.complement_list([c, c + 1].into_iter()); |
472 | } |
473 | |
474 | /// Complements the range in the builder, adding any elements in the range if not in the builder, and |
475 | /// removing them otherwise. |
476 | /// |
477 | /// # Examples |
478 | /// |
479 | /// ``` |
480 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
481 | /// let mut builder = CodePointInversionListBuilder::new(); |
482 | /// builder.add_range(&('A' ..='D' )); |
483 | /// builder.complement_range(&('C' ..='F' )); |
484 | /// let check = builder.build(); |
485 | /// assert!(check.contains('F' )); |
486 | /// assert!(!check.contains('C' )); |
487 | /// ``` |
488 | pub fn complement_range(&mut self, range: &impl RangeBounds<char>) { |
489 | let (start, end) = deconstruct_range(range); |
490 | let to_complement = [start, end]; |
491 | self.complement_list(to_complement.iter().copied()); |
492 | } |
493 | |
494 | /// See [`Self::complement_range`] |
495 | pub fn complement_range32(&mut self, range: &impl RangeBounds<u32>) { |
496 | let (start, end) = deconstruct_range(range); |
497 | let to_complement = [start, end]; |
498 | self.complement_list(to_complement.iter().copied()); |
499 | } |
500 | |
501 | /// Complements the set in the builder, adding any elements in the set if not in the builder, and |
502 | /// removing them otherwise. |
503 | /// |
504 | /// # Examples |
505 | /// |
506 | /// ``` |
507 | /// use icu::collections::codepointinvlist::{ |
508 | /// CodePointInversionList, CodePointInversionListBuilder, |
509 | /// }; |
510 | /// let mut builder = CodePointInversionListBuilder::new(); |
511 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[ |
512 | /// 0x41, 0x46, 0x4B, 0x5A, |
513 | /// ]) |
514 | /// .unwrap(); |
515 | /// builder.add_range(&('C' ..='N' )); // 67 - 78 |
516 | /// builder.complement_set(&set); |
517 | /// let check = builder.build(); |
518 | /// assert!(check.contains('Q' )); // 81 |
519 | /// assert!(!check.contains('N' )); // 78 |
520 | /// ``` |
521 | pub fn complement_set(&mut self, set: &CodePointInversionList) { |
522 | let inv_list_iter_owned = set.as_inversion_list().iter(); |
523 | self.complement_list(inv_list_iter_owned); |
524 | } |
525 | |
526 | /// Returns whether the build is empty. |
527 | /// |
528 | /// # Examples |
529 | /// |
530 | /// ``` |
531 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
532 | /// let mut builder = CodePointInversionListBuilder::new(); |
533 | /// let check = builder.build(); |
534 | /// assert!(check.is_empty()); |
535 | /// ``` |
536 | pub fn is_empty(&self) -> bool { |
537 | self.intervals.is_empty() |
538 | } |
539 | } |
540 | |
541 | #[cfg (test)] |
542 | mod tests { |
543 | use super::{CodePointInversionList, CodePointInversionListBuilder}; |
544 | use core::char; |
545 | use zerovec::ZeroVec; |
546 | |
547 | fn generate_tester(ex: &[u32]) -> CodePointInversionListBuilder { |
548 | let inv_list = ZeroVec::<u32>::alloc_from_slice(ex); |
549 | let check = CodePointInversionList::try_from_inversion_list(inv_list).unwrap(); |
550 | let mut builder = CodePointInversionListBuilder::new(); |
551 | builder.add_set(&check); |
552 | builder |
553 | } |
554 | |
555 | #[test ] |
556 | fn test_new() { |
557 | let ex = CodePointInversionListBuilder::new(); |
558 | assert!(ex.intervals.is_empty()); |
559 | } |
560 | |
561 | #[test ] |
562 | fn test_build() { |
563 | let mut builder = CodePointInversionListBuilder::new(); |
564 | builder.add(0x41, 0x42); |
565 | let check: CodePointInversionList = builder.build(); |
566 | assert_eq!(check.iter_chars().next(), Some('A' )); |
567 | } |
568 | |
569 | #[test ] |
570 | fn test_empty_build() { |
571 | let builder = CodePointInversionListBuilder::new(); |
572 | let check: CodePointInversionList = builder.build(); |
573 | assert!(check.is_empty()); |
574 | } |
575 | |
576 | #[test ] |
577 | fn test_add_to_empty() { |
578 | let mut builder = CodePointInversionListBuilder::new(); |
579 | builder.add(0x0, 0xA); |
580 | assert_eq!(builder.intervals, [0x0, 0xA]); |
581 | } |
582 | |
583 | #[test ] |
584 | fn test_add_invalid() { |
585 | let mut builder = CodePointInversionListBuilder::new(); |
586 | builder.add(0x0, 0x0); |
587 | builder.add(0x5, 0x0); |
588 | assert!(builder.intervals.is_empty()); |
589 | } |
590 | |
591 | #[test ] |
592 | fn test_add_to_start() { |
593 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
594 | builder.add(0x0, 0x5); |
595 | let expected = [0x0, 0x5, 0xA, 0x14, 0x28, 0x32]; |
596 | assert_eq!(builder.intervals, expected); |
597 | } |
598 | |
599 | #[test ] |
600 | fn test_add_to_start_overlap() { |
601 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
602 | builder.add(0x0, 0xE); |
603 | let expected = [0x0, 0x14, 0x28, 0x32]; |
604 | assert_eq!(builder.intervals, expected); |
605 | } |
606 | |
607 | #[test ] |
608 | fn test_add_to_end() { |
609 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
610 | builder.add(0x3C, 0x46); |
611 | let expected = [0xA, 0x14, 0x28, 0x32, 60, 70]; |
612 | assert_eq!(builder.intervals, expected); |
613 | } |
614 | |
615 | #[test ] |
616 | fn test_add_to_end_overlap() { |
617 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
618 | builder.add(0x2B, 0x46); |
619 | let expected = [0xA, 0x14, 0x28, 0x46]; |
620 | assert_eq!(builder.intervals, expected); |
621 | } |
622 | |
623 | #[test ] |
624 | fn test_add_to_middle_no_overlap() { |
625 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
626 | builder.add(0x19, 0x1B); |
627 | let expected = [0xA, 0x14, 0x19, 0x1B, 0x28, 0x32]; |
628 | assert_eq!(builder.intervals, expected); |
629 | } |
630 | |
631 | #[test ] |
632 | fn test_add_to_middle_inside() { |
633 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
634 | builder.add(0xA, 0x14); |
635 | let expected = [0xA, 0x14, 0x28, 0x32]; |
636 | assert_eq!(builder.intervals, expected); |
637 | } |
638 | |
639 | #[test ] |
640 | fn test_add_to_middle_left_overlap() { |
641 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
642 | builder.add(0xF, 0x19); |
643 | let expected = [0xA, 0x19, 0x28, 0x32]; |
644 | assert_eq!(builder.intervals, expected); |
645 | } |
646 | |
647 | #[test ] |
648 | fn test_add_to_middle_right_overlap() { |
649 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
650 | builder.add(0x1E, 0x28); |
651 | let expected = [0xA, 0x14, 0x1E, 0x32]; |
652 | assert_eq!(builder.intervals, expected); |
653 | } |
654 | |
655 | #[test ] |
656 | fn test_add_to_full_encompass() { |
657 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
658 | builder.add(0x0, 0x3C); |
659 | let expected = [0x0, 0x3C]; |
660 | assert_eq!(builder.intervals, expected); |
661 | } |
662 | |
663 | #[test ] |
664 | fn test_add_to_partial_encompass() { |
665 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
666 | builder.add(0x0, 0x23); |
667 | let expected = [0x0, 0x23, 0x28, 0x32]; |
668 | assert_eq!(builder.intervals, expected); |
669 | } |
670 | |
671 | #[test ] |
672 | fn test_add_aligned_front() { |
673 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
674 | builder.add(5, 10); |
675 | let expected = [5, 0x14, 0x28, 0x32]; |
676 | assert_eq!(builder.intervals, expected); |
677 | } |
678 | |
679 | #[test ] |
680 | fn test_add_aligned_back() { |
681 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
682 | builder.add(0x32, 0x37); |
683 | let expected = [0xA, 0x14, 0x28, 0x37]; |
684 | assert_eq!(builder.intervals, expected); |
685 | } |
686 | |
687 | #[test ] |
688 | fn test_add_aligned_start_middle() { |
689 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
690 | builder.add(0x14, 0x19); |
691 | let expected = [0xA, 0x19, 0x28, 0x32]; |
692 | assert_eq!(builder.intervals, expected); |
693 | } |
694 | |
695 | #[test ] |
696 | fn test_add_aligned_end_middle() { |
697 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
698 | builder.add(0x23, 0x28); |
699 | let expected = [0xA, 0x14, 0x23, 0x32]; |
700 | assert_eq!(builder.intervals, expected); |
701 | } |
702 | |
703 | #[test ] |
704 | fn test_add_aligned_in_between_end() { |
705 | let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); |
706 | builder.add(0xF, 0x1E); |
707 | let expected = [0xA, 0x28, 0x32, 0x3C]; |
708 | assert_eq!(builder.intervals, expected); |
709 | } |
710 | |
711 | #[test ] |
712 | fn test_add_aligned_in_between_start() { |
713 | let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); |
714 | builder.add(20, 35); |
715 | let expected = [0xA, 0x28, 0x32, 0x3C]; |
716 | assert_eq!(builder.intervals, expected); |
717 | } |
718 | |
719 | #[test ] |
720 | fn test_add_adjacent_ranges() { |
721 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
722 | builder.add(0x13, 0x14); |
723 | builder.add(0x14, 0x15); |
724 | builder.add(0x15, 0x16); |
725 | let expected = [0xA, 0x16, 0x28, 0x32]; |
726 | assert_eq!(builder.intervals, expected); |
727 | } |
728 | |
729 | #[test ] |
730 | fn test_add_codepointinversionlist() { |
731 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
732 | let check = CodePointInversionList::try_from_inversion_list_slice(&[ |
733 | 0x5, 0xA, 0x16, 0x21, 0x2C, 0x33, |
734 | ]) |
735 | .unwrap(); |
736 | builder.add_set(&check); |
737 | let expected = [0x5, 0x14, 0x16, 0x21, 0x28, 0x33]; |
738 | assert_eq!(builder.intervals, expected); |
739 | } |
740 | #[test ] |
741 | fn test_add_char() { |
742 | let mut builder = CodePointInversionListBuilder::new(); |
743 | builder.add_char('a' ); |
744 | let expected = [0x61, 0x62]; |
745 | assert_eq!(builder.intervals, expected); |
746 | } |
747 | |
748 | #[test ] |
749 | fn test_add_range() { |
750 | let mut builder = CodePointInversionListBuilder::new(); |
751 | builder.add_range(&('A' ..='Z' )); |
752 | let expected = [0x41, 0x5B]; |
753 | assert_eq!(builder.intervals, expected); |
754 | } |
755 | |
756 | #[test ] |
757 | fn test_add_range32() { |
758 | let mut builder = CodePointInversionListBuilder::new(); |
759 | builder.add_range32(&(0xd800..=0xdfff)); |
760 | let expected = [0xd800, 0xe000]; |
761 | assert_eq!(builder.intervals, expected); |
762 | } |
763 | |
764 | #[test ] |
765 | fn test_add_invalid_range() { |
766 | let mut builder = CodePointInversionListBuilder::new(); |
767 | builder.add_range(&('Z' ..='A' )); |
768 | assert!(builder.intervals.is_empty()); |
769 | } |
770 | |
771 | #[test ] |
772 | fn test_remove_empty() { |
773 | let mut builder = CodePointInversionListBuilder::new(); |
774 | builder.remove(0x0, 0xA); |
775 | assert!(builder.intervals.is_empty()); |
776 | } |
777 | |
778 | #[test ] |
779 | fn test_remove_entire_builder() { |
780 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
781 | builder.remove(0xA, 0x32); |
782 | assert!(builder.intervals.is_empty()); |
783 | } |
784 | |
785 | #[test ] |
786 | fn test_remove_entire_range() { |
787 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
788 | builder.remove(0xA, 0x14); |
789 | let expected = [0x28, 0x32]; |
790 | assert_eq!(builder.intervals, expected); |
791 | } |
792 | |
793 | #[test ] |
794 | fn test_remove_partial_range_left() { |
795 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
796 | builder.remove(0xA, 0x2B); |
797 | let expected = [0x2B, 0x32]; |
798 | assert_eq!(builder.intervals, expected); |
799 | } |
800 | |
801 | #[test ] |
802 | fn test_remove_ne_range() { |
803 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
804 | builder.remove(0x14, 0x28); |
805 | let expected = [0xA, 0x14, 0x28, 0x32]; |
806 | assert_eq!(builder.intervals, expected); |
807 | } |
808 | |
809 | #[test ] |
810 | fn test_remove_partial_range_right() { |
811 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
812 | builder.remove(0xF, 0x37); |
813 | let expected = [0xA, 0xF]; |
814 | assert_eq!(builder.intervals, expected); |
815 | } |
816 | |
817 | #[test ] |
818 | fn test_remove_middle_range() { |
819 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
820 | builder.remove(0xC, 0x12); |
821 | let expected = [0xA, 0xC, 0x12, 0x14, 0x28, 0x32]; |
822 | assert_eq!(builder.intervals, expected); |
823 | } |
824 | |
825 | #[test ] |
826 | fn test_remove_ne_middle_range() { |
827 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
828 | builder.remove(0x19, 0x1B); |
829 | let expected = [0xA, 0x14, 0x28, 0x32]; |
830 | assert_eq!(builder.intervals, expected); |
831 | } |
832 | |
833 | #[test ] |
834 | fn test_remove_encompassed_range() { |
835 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
836 | builder.remove(0x19, 0x37); |
837 | let expected = [0xA, 0x14, 0x46, 0x50]; |
838 | assert_eq!(builder.intervals, expected); |
839 | } |
840 | #[test ] |
841 | fn test_remove_adjacent_ranges() { |
842 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
843 | builder.remove(0x27, 0x28); |
844 | builder.remove(0x28, 0x29); |
845 | builder.remove(0x29, 0x2A); |
846 | let expected = [0xA, 0x14, 0x2A, 0x32]; |
847 | assert_eq!(builder.intervals, expected); |
848 | } |
849 | |
850 | #[test ] |
851 | fn test_remove_char() { |
852 | let mut builder = generate_tester(&[0x41, 0x46]); |
853 | builder.remove_char('A' ); // 65 |
854 | let expected = [0x42, 0x46]; |
855 | assert_eq!(builder.intervals, expected); |
856 | } |
857 | |
858 | #[test ] |
859 | fn test_remove_range() { |
860 | let mut builder = generate_tester(&[0x41, 0x5A]); |
861 | builder.remove_range(&('A' ..'L' )); // 65 - 76 |
862 | let expected = [0x4C, 0x5A]; |
863 | assert_eq!(builder.intervals, expected); |
864 | } |
865 | |
866 | #[test ] |
867 | fn test_remove_set() { |
868 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
869 | let remove = |
870 | CodePointInversionList::try_from_inversion_list_slice(&[0xA, 0x14, 0x2D, 0x4B]) |
871 | .unwrap(); |
872 | builder.remove_set(&remove); |
873 | let expected = [0x28, 0x2D, 0x4B, 0x50]; |
874 | assert_eq!(builder.intervals, expected); |
875 | } |
876 | |
877 | #[test ] |
878 | fn test_retain_char() { |
879 | let mut builder = generate_tester(&[0x41, 0x5A]); |
880 | builder.retain_char('A' ); // 65 |
881 | let expected = [0x41, 0x42]; |
882 | assert_eq!(builder.intervals, expected); |
883 | } |
884 | |
885 | #[test ] |
886 | fn test_retain_range() { |
887 | let mut builder = generate_tester(&[0x41, 0x5A]); |
888 | builder.retain_range(&('C' ..'F' )); // 67 - 70 |
889 | let expected = [0x43, 0x46]; |
890 | assert_eq!(builder.intervals, expected); |
891 | } |
892 | |
893 | #[test ] |
894 | fn test_retain_range_empty() { |
895 | let mut builder = generate_tester(&[0x41, 0x46]); |
896 | builder.retain_range(&('F' ..'Z' )); |
897 | assert!(builder.intervals.is_empty()); |
898 | } |
899 | |
900 | #[test ] |
901 | fn test_retain_set() { |
902 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
903 | let retain = CodePointInversionList::try_from_inversion_list_slice(&[ |
904 | 0xE, 0x14, 0x19, 0x37, 0x4D, 0x51, |
905 | ]) |
906 | .unwrap(); |
907 | builder.retain_set(&retain); |
908 | let expected = [0xE, 0x14, 0x28, 0x32, 0x4D, 0x50]; |
909 | assert_eq!(builder.intervals, expected); |
910 | } |
911 | |
912 | #[test ] |
913 | fn test_complement() { |
914 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
915 | builder.complement(); |
916 | let expected = [0x0, 0xA, 0x14, 0x28, 0x32, (char::MAX as u32) + 1]; |
917 | assert_eq!(builder.intervals, expected); |
918 | } |
919 | |
920 | #[test ] |
921 | fn test_complement_empty() { |
922 | let mut builder = generate_tester(&[]); |
923 | builder.complement(); |
924 | let expected = [0x0, (char::MAX as u32) + 1]; |
925 | assert_eq!(builder.intervals, expected); |
926 | |
927 | builder.complement(); |
928 | let expected: [u32; 0] = []; |
929 | assert_eq!(builder.intervals, expected); |
930 | } |
931 | |
932 | #[test ] |
933 | fn test_complement_zero_max() { |
934 | let mut builder = generate_tester(&[0x0, 0xA, 0x5A, (char::MAX as u32) + 1]); |
935 | builder.complement(); |
936 | let expected = [0xA, 0x5A]; |
937 | assert_eq!(builder.intervals, expected); |
938 | } |
939 | |
940 | #[test ] |
941 | fn test_complement_interior() { |
942 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
943 | builder.complement_list([0xE, 0x14].iter().copied()); |
944 | let expected = [0xA, 0xE, 0x28, 0x32]; |
945 | assert_eq!(builder.intervals, expected); |
946 | } |
947 | |
948 | #[test ] |
949 | fn test_complement_exterior() { |
950 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
951 | builder.complement_list([0x19, 0x23].iter().copied()); |
952 | let expected = [0xA, 0x14, 0x19, 0x23, 0x28, 0x32]; |
953 | assert_eq!(builder.intervals, expected); |
954 | } |
955 | |
956 | #[test ] |
957 | fn test_complement_larger_list() { |
958 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
959 | builder.complement_list([0x1E, 0x37, 0x3C, 0x46].iter().copied()); |
960 | let expected = [0xA, 0x14, 0x1E, 0x28, 0x32, 0x37, 0x3C, 0x46]; |
961 | assert_eq!(builder.intervals, expected); |
962 | } |
963 | |
964 | #[test ] |
965 | fn test_complement_char() { |
966 | let mut builder = generate_tester(&[0x41, 0x4C]); // A - K |
967 | builder.complement_char('A' ); |
968 | builder.complement_char('L' ); |
969 | let expected = [0x42, 0x4D]; |
970 | assert_eq!(builder.intervals, expected); |
971 | } |
972 | |
973 | #[test ] |
974 | fn test_complement_range() { |
975 | let mut builder = generate_tester(&[0x46, 0x4C]); // F - K |
976 | builder.complement_range(&('A' ..='Z' )); |
977 | let expected = [0x41, 0x46, 0x4C, 0x5B]; |
978 | assert_eq!(builder.intervals, expected); |
979 | } |
980 | |
981 | #[test ] |
982 | fn test_complement_set() { |
983 | let mut builder = generate_tester(&[0x43, 0x4E]); |
984 | let set = CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x46, 0x4B, 0x5A]) |
985 | .unwrap(); |
986 | builder.complement_set(&set); |
987 | let expected = [0x41, 0x43, 0x46, 0x4B, 0x4E, 0x5A]; |
988 | assert_eq!(builder.intervals, expected); |
989 | } |
990 | |
991 | #[test ] |
992 | fn test_is_empty() { |
993 | let builder = CodePointInversionListBuilder::new(); |
994 | assert!(builder.is_empty()); |
995 | } |
996 | } |
997 | |