| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use alloc::vec; |
| 6 | use alloc::vec::Vec; |
| 7 | use core::{char, cmp::Ordering, ops::RangeBounds}; |
| 8 | |
| 9 | use crate::codepointinvlist::{utils::deconstruct_range, CodePointInversionList}; |
| 10 | use zerovec::{ule::AsULE, ZeroVec}; |
| 11 | |
| 12 | /// A builder for [`CodePointInversionList`]. |
| 13 | /// |
| 14 | /// Provides exposure to builder functions and conversion to [`CodePointInversionList`] |
| 15 | #[derive (Default)] |
| 16 | pub struct CodePointInversionListBuilder { |
| 17 | // A sorted list of even length, with values <= char::MAX + 1 |
| 18 | intervals: Vec<u32>, |
| 19 | } |
| 20 | |
| 21 | impl CodePointInversionListBuilder { |
| 22 | /// Returns empty [`CodePointInversionListBuilder`] |
| 23 | pub const fn new() -> Self { |
| 24 | Self { intervals: vec![] } |
| 25 | } |
| 26 | |
| 27 | /// Returns a [`CodePointInversionList`] and consumes the [`CodePointInversionListBuilder`] |
| 28 | pub fn build(self) -> CodePointInversionList<'static> { |
| 29 | let inv_list: ZeroVec<u32> = ZeroVec::alloc_from_slice(&self.intervals); |
| 30 | #[allow (clippy::unwrap_used)] // by invariant |
| 31 | CodePointInversionList::try_from_inversion_list(inv_list).unwrap() |
| 32 | } |
| 33 | |
| 34 | /// Abstraction for adding/removing a range from start..end |
| 35 | /// |
| 36 | /// If add is true add, else remove |
| 37 | fn add_remove_middle(&mut self, start: u32, end: u32, add: bool) { |
| 38 | if start >= end || end > char::MAX as u32 + 1 { |
| 39 | return; |
| 40 | } |
| 41 | let start_res = self.intervals.binary_search(&start); |
| 42 | let end_res = self.intervals.binary_search(&end); |
| 43 | let mut start_ind = start_res.unwrap_or_else(|x| x); |
| 44 | let mut end_ind = end_res.unwrap_or_else(|x| x); |
| 45 | let start_pos_check = (start_ind % 2 == 0) == add; |
| 46 | let end_pos_check = (end_ind % 2 == 0) == add; |
| 47 | let start_eq_end = start_ind == end_ind; |
| 48 | |
| 49 | #[allow (clippy::indexing_slicing)] // all indices are binary search results |
| 50 | if start_eq_end && start_pos_check && end_res.is_err() { |
| 51 | let ins = &[start, end]; |
| 52 | self.intervals |
| 53 | .splice(start_ind..end_ind, ins.iter().copied()); |
| 54 | } else { |
| 55 | if start_pos_check { |
| 56 | self.intervals[start_ind] = start; |
| 57 | start_ind += 1; |
| 58 | } |
| 59 | if end_pos_check { |
| 60 | if end_res.is_ok() { |
| 61 | end_ind += 1; |
| 62 | } else { |
| 63 | end_ind -= 1; |
| 64 | self.intervals[end_ind] = end; |
| 65 | } |
| 66 | } |
| 67 | if start_ind < end_ind { |
| 68 | self.intervals.drain(start_ind..end_ind); |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /// Add the range to the [`CodePointInversionListBuilder`] |
| 74 | /// |
| 75 | /// Accomplishes this through binary search for the start and end indices and merges intervals |
| 76 | /// in between with inplace memory. Performs `O(1)` operation if adding to end of list, and `O(N)` otherwise, |
| 77 | /// where `N` is the number of endpoints. |
| 78 | fn add(&mut self, start: u32, end: u32) { |
| 79 | if start >= end { |
| 80 | return; |
| 81 | } |
| 82 | if self.intervals.is_empty() { |
| 83 | self.intervals.extend_from_slice(&[start, end]); |
| 84 | return; |
| 85 | } |
| 86 | self.add_remove_middle(start, end, true); |
| 87 | } |
| 88 | |
| 89 | /// Add the character to the [`CodePointInversionListBuilder`] |
| 90 | /// |
| 91 | /// # Examples |
| 92 | /// |
| 93 | /// ``` |
| 94 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 95 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 96 | /// builder.add_char('a' ); |
| 97 | /// let check = builder.build(); |
| 98 | /// assert_eq!(check.iter_chars().next(), Some('a' )); |
| 99 | /// ``` |
| 100 | pub fn add_char(&mut self, c: char) { |
| 101 | let to_add = c as u32; |
| 102 | self.add(to_add, to_add + 1); |
| 103 | } |
| 104 | |
| 105 | /// Add the code point value to the [`CodePointInversionListBuilder`] |
| 106 | /// |
| 107 | /// Note: Even though [`u32`] and [`prim@char`] in Rust are non-negative 4-byte |
| 108 | /// values, there is an important difference. A [`u32`] can take values up to |
| 109 | /// a very large integer value, while a [`prim@char`] in Rust is defined to be in |
| 110 | /// the range from 0 to the maximum valid Unicode Scalar Value. |
| 111 | /// |
| 112 | /// # Examples |
| 113 | /// |
| 114 | /// ``` |
| 115 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 116 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 117 | /// builder.add32(0x41); |
| 118 | /// let check = builder.build(); |
| 119 | /// assert!(check.contains32(0x41)); |
| 120 | /// ``` |
| 121 | pub fn add32(&mut self, c: u32) { |
| 122 | if c <= char::MAX as u32 { |
| 123 | // we already know 0 <= c because c: u32 |
| 124 | self.add(c, c + 1); |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | /// Same as [`Self::add32`]. |
| 129 | #[deprecated (since = "1.5.0" , note = "Use `add32`" )] |
| 130 | pub fn add_u32(&mut self, c: u32) { |
| 131 | self.add32(c) |
| 132 | } |
| 133 | |
| 134 | /// Add the range of characters to the [`CodePointInversionListBuilder`] |
| 135 | /// |
| 136 | /// # Examples |
| 137 | /// |
| 138 | /// ``` |
| 139 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 140 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 141 | /// builder.add_range(&('A' ..='Z' )); |
| 142 | /// let check = builder.build(); |
| 143 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
| 144 | /// ``` |
| 145 | pub fn add_range(&mut self, range: &impl RangeBounds<char>) { |
| 146 | let (start, end) = deconstruct_range(range); |
| 147 | self.add(start, end); |
| 148 | } |
| 149 | |
| 150 | /// Add the range of characters, represented as u32, to the [`CodePointInversionListBuilder`] |
| 151 | /// |
| 152 | /// # Examples |
| 153 | /// |
| 154 | /// ``` |
| 155 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 156 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 157 | /// builder.add_range32(&(0xd800..=0xdfff)); |
| 158 | /// let check = builder.build(); |
| 159 | /// assert!(check.contains32(0xd900)); |
| 160 | /// ``` |
| 161 | pub fn add_range32(&mut self, range: &impl RangeBounds<u32>) { |
| 162 | let (start, end) = deconstruct_range(range); |
| 163 | // Sets that include char::MAX need to allow an end value of MAX + 1 |
| 164 | if start <= end && end <= char::MAX as u32 + 1 { |
| 165 | self.add(start, end); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | /// Same as [`Self::add_range32`]. |
| 170 | #[deprecated (since = "1.5.0" , note = "Use `add_range32`" )] |
| 171 | pub fn add_range_u32(&mut self, range: &impl RangeBounds<u32>) { |
| 172 | self.add_range32(range) |
| 173 | } |
| 174 | |
| 175 | /// Add the [`CodePointInversionList`] reference to the [`CodePointInversionListBuilder`] |
| 176 | /// |
| 177 | /// # Examples |
| 178 | /// |
| 179 | /// ``` |
| 180 | /// use icu::collections::codepointinvlist::{ |
| 181 | /// CodePointInversionList, CodePointInversionListBuilder, |
| 182 | /// }; |
| 183 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 184 | /// let set = |
| 185 | /// CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x4C]) |
| 186 | /// .unwrap(); |
| 187 | /// builder.add_set(&set); |
| 188 | /// let check = builder.build(); |
| 189 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
| 190 | /// ``` |
| 191 | #[allow (unused_assignments)] |
| 192 | pub fn add_set(&mut self, set: &CodePointInversionList) { |
| 193 | #[allow (clippy::indexing_slicing)] // chunks |
| 194 | set.as_inversion_list() |
| 195 | .as_ule_slice() |
| 196 | .chunks(2) |
| 197 | .for_each(|pair| { |
| 198 | self.add( |
| 199 | AsULE::from_unaligned(pair[0]), |
| 200 | AsULE::from_unaligned(pair[1]), |
| 201 | ) |
| 202 | }); |
| 203 | } |
| 204 | |
| 205 | /// Removes the range from the [`CodePointInversionListBuilder`] |
| 206 | /// |
| 207 | /// Performs binary search to find start and end affected intervals, then removes in an `O(N)` fashion |
| 208 | /// where `N` is the number of endpoints, with in-place memory. |
| 209 | fn remove(&mut self, start: u32, end: u32) { |
| 210 | if start >= end || self.intervals.is_empty() { |
| 211 | return; |
| 212 | } |
| 213 | if let Some(&last) = self.intervals.last() { |
| 214 | #[allow (clippy::indexing_slicing)] |
| 215 | // by invariant, if we have a last we have a (different) first |
| 216 | if start <= self.intervals[0] && end >= last { |
| 217 | self.intervals.clear(); |
| 218 | } else { |
| 219 | self.add_remove_middle(start, end, false); |
| 220 | } |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | /// Remove the character from the [`CodePointInversionListBuilder`] |
| 225 | /// |
| 226 | /// # Examples |
| 227 | /// |
| 228 | /// ``` |
| 229 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 230 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 231 | /// builder.add_range(&('A' ..='Z' )); |
| 232 | /// builder.remove_char('A' ); |
| 233 | /// let check = builder.build(); |
| 234 | /// assert_eq!(check.iter_chars().next(), Some('B' )); |
| 235 | pub fn remove_char(&mut self, c: char) { |
| 236 | self.remove32(c as u32) |
| 237 | } |
| 238 | |
| 239 | /// See [`Self::remove_char`] |
| 240 | pub fn remove32(&mut self, c: u32) { |
| 241 | self.remove(c, c + 1); |
| 242 | } |
| 243 | |
| 244 | /// Remove the range of characters from the [`CodePointInversionListBuilder`] |
| 245 | /// |
| 246 | /// # Examples |
| 247 | /// |
| 248 | /// ``` |
| 249 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 250 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 251 | /// builder.add_range(&('A' ..='Z' )); |
| 252 | /// builder.remove_range(&('A' ..='C' )); |
| 253 | /// let check = builder.build(); |
| 254 | /// assert_eq!(check.iter_chars().next(), Some('D' )); |
| 255 | pub fn remove_range(&mut self, range: &impl RangeBounds<char>) { |
| 256 | let (start, end) = deconstruct_range(range); |
| 257 | self.remove(start, end); |
| 258 | } |
| 259 | |
| 260 | /// See [`Self::remove_range`] |
| 261 | pub fn remove_range32(&mut self, range: &impl RangeBounds<u32>) { |
| 262 | let (start, end) = deconstruct_range(range); |
| 263 | self.remove(start, end); |
| 264 | } |
| 265 | |
| 266 | /// Remove the [`CodePointInversionList`] from the [`CodePointInversionListBuilder`] |
| 267 | /// |
| 268 | /// # Examples |
| 269 | /// |
| 270 | /// ``` |
| 271 | /// use icu::collections::codepointinvlist::{CodePointInversionList, CodePointInversionListBuilder}; |
| 272 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 273 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x46]).unwrap(); |
| 274 | /// builder.add_range(&('A' ..='Z' )); |
| 275 | /// builder.remove_set(&set); // removes 'A'..='E' |
| 276 | /// let check = builder.build(); |
| 277 | /// assert_eq!(check.iter_chars().next(), Some('F' )); |
| 278 | #[allow (clippy::indexing_slicing)] // chunks |
| 279 | pub fn remove_set(&mut self, set: &CodePointInversionList) { |
| 280 | set.as_inversion_list() |
| 281 | .as_ule_slice() |
| 282 | .chunks(2) |
| 283 | .for_each(|pair| { |
| 284 | self.remove( |
| 285 | AsULE::from_unaligned(pair[0]), |
| 286 | AsULE::from_unaligned(pair[1]), |
| 287 | ) |
| 288 | }); |
| 289 | } |
| 290 | |
| 291 | /// Retain the specified character in the [`CodePointInversionListBuilder`] if it exists |
| 292 | /// |
| 293 | /// # Examples |
| 294 | /// |
| 295 | /// ``` |
| 296 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 297 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 298 | /// builder.add_range(&('A' ..='Z' )); |
| 299 | /// builder.retain_char('A' ); |
| 300 | /// let set = builder.build(); |
| 301 | /// let mut check = set.iter_chars(); |
| 302 | /// assert_eq!(check.next(), Some('A' )); |
| 303 | /// assert_eq!(check.next(), None); |
| 304 | /// ``` |
| 305 | pub fn retain_char(&mut self, c: char) { |
| 306 | self.retain32(c as u32) |
| 307 | } |
| 308 | |
| 309 | /// See [`Self::retain_char`] |
| 310 | pub fn retain32(&mut self, c: u32) { |
| 311 | self.remove(0, c); |
| 312 | self.remove(c + 1, (char::MAX as u32) + 1); |
| 313 | } |
| 314 | |
| 315 | /// Retain the range of characters located within the [`CodePointInversionListBuilder`] |
| 316 | /// |
| 317 | /// # Examples |
| 318 | /// |
| 319 | /// ``` |
| 320 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 321 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 322 | /// builder.add_range(&('A' ..='Z' )); |
| 323 | /// builder.retain_range(&('A' ..='B' )); |
| 324 | /// let set = builder.build(); |
| 325 | /// let mut check = set.iter_chars(); |
| 326 | /// assert_eq!(check.next(), Some('A' )); |
| 327 | /// assert_eq!(check.next(), Some('B' )); |
| 328 | /// assert_eq!(check.next(), None); |
| 329 | /// ``` |
| 330 | pub fn retain_range(&mut self, range: &impl RangeBounds<char>) { |
| 331 | let (start, end) = deconstruct_range(range); |
| 332 | self.remove(0, start); |
| 333 | self.remove(end, (char::MAX as u32) + 1); |
| 334 | } |
| 335 | |
| 336 | /// See [`Self::retain_range`] |
| 337 | pub fn retain_range32(&mut self, range: &impl RangeBounds<u32>) { |
| 338 | let (start, end) = deconstruct_range(range); |
| 339 | self.remove(0, start); |
| 340 | self.remove(end, (char::MAX as u32) + 1); |
| 341 | } |
| 342 | |
| 343 | /// Retain the elements in the specified set within the [`CodePointInversionListBuilder`] |
| 344 | /// |
| 345 | /// # Examples |
| 346 | /// |
| 347 | /// ``` |
| 348 | /// use icu::collections::codepointinvlist::{ |
| 349 | /// CodePointInversionList, CodePointInversionListBuilder, |
| 350 | /// }; |
| 351 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 352 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[65, 70]) |
| 353 | /// .unwrap(); |
| 354 | /// builder.add_range(&('A' ..='Z' )); |
| 355 | /// builder.retain_set(&set); // retains 'A'..='E' |
| 356 | /// let check = builder.build(); |
| 357 | /// assert!(check.contains('A' )); |
| 358 | /// assert!(!check.contains('G' )); |
| 359 | /// ``` |
| 360 | #[allow (clippy::indexing_slicing)] // chunks |
| 361 | pub fn retain_set(&mut self, set: &CodePointInversionList) { |
| 362 | let mut prev = 0; |
| 363 | for pair in set.as_inversion_list().as_ule_slice().chunks(2) { |
| 364 | let range_start = AsULE::from_unaligned(pair[0]); |
| 365 | let range_limit = AsULE::from_unaligned(pair[1]); |
| 366 | self.remove(prev, range_start); |
| 367 | prev = range_limit; |
| 368 | } |
| 369 | self.remove(prev, (char::MAX as u32) + 1); |
| 370 | } |
| 371 | |
| 372 | /// Computes the complement of the argument, adding any elements that do not yet exist in the builder, |
| 373 | /// and removing any elements that already exist in the builder. See public functions for examples. |
| 374 | /// |
| 375 | /// Performs in `O(B + S)`, where `B` is the number of endpoints in the Builder, and `S` is the number |
| 376 | /// of endpoints in the argument. |
| 377 | fn complement_list(&mut self, set_iter: impl core::iter::Iterator<Item = u32>) { |
| 378 | let mut res: Vec<u32> = vec![]; // not the biggest fan of having to allocate new memory |
| 379 | let mut ai = self.intervals.iter(); |
| 380 | let mut bi = set_iter; |
| 381 | let mut a = ai.next(); |
| 382 | let mut b = bi.next(); |
| 383 | while let (Some(c), Some(d)) = (a, b) { |
| 384 | match c.cmp(&d) { |
| 385 | Ordering::Less => { |
| 386 | res.push(*c); |
| 387 | a = ai.next(); |
| 388 | } |
| 389 | Ordering::Greater => { |
| 390 | res.push(d); |
| 391 | b = bi.next(); |
| 392 | } |
| 393 | Ordering::Equal => { |
| 394 | a = ai.next(); |
| 395 | b = bi.next(); |
| 396 | } |
| 397 | } |
| 398 | } |
| 399 | if let Some(c) = a { |
| 400 | res.push(*c) |
| 401 | } |
| 402 | if let Some(d) = b { |
| 403 | res.push(d) |
| 404 | } |
| 405 | res.extend(ai); |
| 406 | res.extend(bi); |
| 407 | self.intervals = res; |
| 408 | } |
| 409 | |
| 410 | /// Computes the complement of the builder, inverting the builder (any elements in the builder are removed, |
| 411 | /// while any elements not in the builder are added) |
| 412 | /// |
| 413 | /// # Examples |
| 414 | /// |
| 415 | /// ``` |
| 416 | /// use icu::collections::codepointinvlist::{ |
| 417 | /// CodePointInversionList, CodePointInversionListBuilder, |
| 418 | /// }; |
| 419 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 420 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[ |
| 421 | /// 0x0, |
| 422 | /// 0x41, |
| 423 | /// 0x46, |
| 424 | /// (std::char::MAX as u32) + 1, |
| 425 | /// ]) |
| 426 | /// .unwrap(); |
| 427 | /// builder.add_set(&set); |
| 428 | /// builder.complement(); |
| 429 | /// let check = builder.build(); |
| 430 | /// assert_eq!(check.iter_chars().next(), Some('A' )); |
| 431 | /// ``` |
| 432 | pub fn complement(&mut self) { |
| 433 | if !self.intervals.is_empty() { |
| 434 | #[allow (clippy::indexing_slicing)] // by invariant |
| 435 | if self.intervals[0] == 0 { |
| 436 | self.intervals.drain(0..1); |
| 437 | } else { |
| 438 | self.intervals.insert(0, 0); |
| 439 | } |
| 440 | if self.intervals.last() == Some(&(char::MAX as u32 + 1)) { |
| 441 | self.intervals.pop(); |
| 442 | } else { |
| 443 | self.intervals.push(char::MAX as u32 + 1); |
| 444 | } |
| 445 | } else { |
| 446 | self.intervals |
| 447 | .extend_from_slice(&[0, (char::MAX as u32 + 1)]); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | /// Complements the character in the builder, adding it if not in the builder, and removing it otherwise. |
| 452 | /// |
| 453 | /// # Examples |
| 454 | /// |
| 455 | /// ``` |
| 456 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 457 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 458 | /// builder.add_range(&('A' ..='D' )); |
| 459 | /// builder.complement_char('A' ); |
| 460 | /// builder.complement_char('E' ); |
| 461 | /// let check = builder.build(); |
| 462 | /// assert!(check.contains('E' )); |
| 463 | /// assert!(!check.contains('A' )); |
| 464 | /// ``` |
| 465 | pub fn complement_char(&mut self, c: char) { |
| 466 | self.complement32(c as u32); |
| 467 | } |
| 468 | |
| 469 | /// See [`Self::complement_char`] |
| 470 | pub fn complement32(&mut self, c: u32) { |
| 471 | self.complement_list([c, c + 1].into_iter()); |
| 472 | } |
| 473 | |
| 474 | /// Complements the range in the builder, adding any elements in the range if not in the builder, and |
| 475 | /// removing them otherwise. |
| 476 | /// |
| 477 | /// # Examples |
| 478 | /// |
| 479 | /// ``` |
| 480 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 481 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 482 | /// builder.add_range(&('A' ..='D' )); |
| 483 | /// builder.complement_range(&('C' ..='F' )); |
| 484 | /// let check = builder.build(); |
| 485 | /// assert!(check.contains('F' )); |
| 486 | /// assert!(!check.contains('C' )); |
| 487 | /// ``` |
| 488 | pub fn complement_range(&mut self, range: &impl RangeBounds<char>) { |
| 489 | let (start, end) = deconstruct_range(range); |
| 490 | let to_complement = [start, end]; |
| 491 | self.complement_list(to_complement.iter().copied()); |
| 492 | } |
| 493 | |
| 494 | /// See [`Self::complement_range`] |
| 495 | pub fn complement_range32(&mut self, range: &impl RangeBounds<u32>) { |
| 496 | let (start, end) = deconstruct_range(range); |
| 497 | let to_complement = [start, end]; |
| 498 | self.complement_list(to_complement.iter().copied()); |
| 499 | } |
| 500 | |
| 501 | /// Complements the set in the builder, adding any elements in the set if not in the builder, and |
| 502 | /// removing them otherwise. |
| 503 | /// |
| 504 | /// # Examples |
| 505 | /// |
| 506 | /// ``` |
| 507 | /// use icu::collections::codepointinvlist::{ |
| 508 | /// CodePointInversionList, CodePointInversionListBuilder, |
| 509 | /// }; |
| 510 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 511 | /// let set = CodePointInversionList::try_from_inversion_list_slice(&[ |
| 512 | /// 0x41, 0x46, 0x4B, 0x5A, |
| 513 | /// ]) |
| 514 | /// .unwrap(); |
| 515 | /// builder.add_range(&('C' ..='N' )); // 67 - 78 |
| 516 | /// builder.complement_set(&set); |
| 517 | /// let check = builder.build(); |
| 518 | /// assert!(check.contains('Q' )); // 81 |
| 519 | /// assert!(!check.contains('N' )); // 78 |
| 520 | /// ``` |
| 521 | pub fn complement_set(&mut self, set: &CodePointInversionList) { |
| 522 | let inv_list_iter_owned = set.as_inversion_list().iter(); |
| 523 | self.complement_list(inv_list_iter_owned); |
| 524 | } |
| 525 | |
| 526 | /// Returns whether the build is empty. |
| 527 | /// |
| 528 | /// # Examples |
| 529 | /// |
| 530 | /// ``` |
| 531 | /// use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 532 | /// let mut builder = CodePointInversionListBuilder::new(); |
| 533 | /// let check = builder.build(); |
| 534 | /// assert!(check.is_empty()); |
| 535 | /// ``` |
| 536 | pub fn is_empty(&self) -> bool { |
| 537 | self.intervals.is_empty() |
| 538 | } |
| 539 | } |
| 540 | |
| 541 | #[cfg (test)] |
| 542 | mod tests { |
| 543 | use super::{CodePointInversionList, CodePointInversionListBuilder}; |
| 544 | use core::char; |
| 545 | use zerovec::ZeroVec; |
| 546 | |
| 547 | fn generate_tester(ex: &[u32]) -> CodePointInversionListBuilder { |
| 548 | let inv_list = ZeroVec::<u32>::alloc_from_slice(ex); |
| 549 | let check = CodePointInversionList::try_from_inversion_list(inv_list).unwrap(); |
| 550 | let mut builder = CodePointInversionListBuilder::new(); |
| 551 | builder.add_set(&check); |
| 552 | builder |
| 553 | } |
| 554 | |
| 555 | #[test ] |
| 556 | fn test_new() { |
| 557 | let ex = CodePointInversionListBuilder::new(); |
| 558 | assert!(ex.intervals.is_empty()); |
| 559 | } |
| 560 | |
| 561 | #[test ] |
| 562 | fn test_build() { |
| 563 | let mut builder = CodePointInversionListBuilder::new(); |
| 564 | builder.add(0x41, 0x42); |
| 565 | let check: CodePointInversionList = builder.build(); |
| 566 | assert_eq!(check.iter_chars().next(), Some('A' )); |
| 567 | } |
| 568 | |
| 569 | #[test ] |
| 570 | fn test_empty_build() { |
| 571 | let builder = CodePointInversionListBuilder::new(); |
| 572 | let check: CodePointInversionList = builder.build(); |
| 573 | assert!(check.is_empty()); |
| 574 | } |
| 575 | |
| 576 | #[test ] |
| 577 | fn test_add_to_empty() { |
| 578 | let mut builder = CodePointInversionListBuilder::new(); |
| 579 | builder.add(0x0, 0xA); |
| 580 | assert_eq!(builder.intervals, [0x0, 0xA]); |
| 581 | } |
| 582 | |
| 583 | #[test ] |
| 584 | fn test_add_invalid() { |
| 585 | let mut builder = CodePointInversionListBuilder::new(); |
| 586 | builder.add(0x0, 0x0); |
| 587 | builder.add(0x5, 0x0); |
| 588 | assert!(builder.intervals.is_empty()); |
| 589 | } |
| 590 | |
| 591 | #[test ] |
| 592 | fn test_add_to_start() { |
| 593 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 594 | builder.add(0x0, 0x5); |
| 595 | let expected = [0x0, 0x5, 0xA, 0x14, 0x28, 0x32]; |
| 596 | assert_eq!(builder.intervals, expected); |
| 597 | } |
| 598 | |
| 599 | #[test ] |
| 600 | fn test_add_to_start_overlap() { |
| 601 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 602 | builder.add(0x0, 0xE); |
| 603 | let expected = [0x0, 0x14, 0x28, 0x32]; |
| 604 | assert_eq!(builder.intervals, expected); |
| 605 | } |
| 606 | |
| 607 | #[test ] |
| 608 | fn test_add_to_end() { |
| 609 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 610 | builder.add(0x3C, 0x46); |
| 611 | let expected = [0xA, 0x14, 0x28, 0x32, 60, 70]; |
| 612 | assert_eq!(builder.intervals, expected); |
| 613 | } |
| 614 | |
| 615 | #[test ] |
| 616 | fn test_add_to_end_overlap() { |
| 617 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 618 | builder.add(0x2B, 0x46); |
| 619 | let expected = [0xA, 0x14, 0x28, 0x46]; |
| 620 | assert_eq!(builder.intervals, expected); |
| 621 | } |
| 622 | |
| 623 | #[test ] |
| 624 | fn test_add_to_middle_no_overlap() { |
| 625 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 626 | builder.add(0x19, 0x1B); |
| 627 | let expected = [0xA, 0x14, 0x19, 0x1B, 0x28, 0x32]; |
| 628 | assert_eq!(builder.intervals, expected); |
| 629 | } |
| 630 | |
| 631 | #[test ] |
| 632 | fn test_add_to_middle_inside() { |
| 633 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 634 | builder.add(0xA, 0x14); |
| 635 | let expected = [0xA, 0x14, 0x28, 0x32]; |
| 636 | assert_eq!(builder.intervals, expected); |
| 637 | } |
| 638 | |
| 639 | #[test ] |
| 640 | fn test_add_to_middle_left_overlap() { |
| 641 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 642 | builder.add(0xF, 0x19); |
| 643 | let expected = [0xA, 0x19, 0x28, 0x32]; |
| 644 | assert_eq!(builder.intervals, expected); |
| 645 | } |
| 646 | |
| 647 | #[test ] |
| 648 | fn test_add_to_middle_right_overlap() { |
| 649 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 650 | builder.add(0x1E, 0x28); |
| 651 | let expected = [0xA, 0x14, 0x1E, 0x32]; |
| 652 | assert_eq!(builder.intervals, expected); |
| 653 | } |
| 654 | |
| 655 | #[test ] |
| 656 | fn test_add_to_full_encompass() { |
| 657 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 658 | builder.add(0x0, 0x3C); |
| 659 | let expected = [0x0, 0x3C]; |
| 660 | assert_eq!(builder.intervals, expected); |
| 661 | } |
| 662 | |
| 663 | #[test ] |
| 664 | fn test_add_to_partial_encompass() { |
| 665 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 666 | builder.add(0x0, 0x23); |
| 667 | let expected = [0x0, 0x23, 0x28, 0x32]; |
| 668 | assert_eq!(builder.intervals, expected); |
| 669 | } |
| 670 | |
| 671 | #[test ] |
| 672 | fn test_add_aligned_front() { |
| 673 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 674 | builder.add(5, 10); |
| 675 | let expected = [5, 0x14, 0x28, 0x32]; |
| 676 | assert_eq!(builder.intervals, expected); |
| 677 | } |
| 678 | |
| 679 | #[test ] |
| 680 | fn test_add_aligned_back() { |
| 681 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 682 | builder.add(0x32, 0x37); |
| 683 | let expected = [0xA, 0x14, 0x28, 0x37]; |
| 684 | assert_eq!(builder.intervals, expected); |
| 685 | } |
| 686 | |
| 687 | #[test ] |
| 688 | fn test_add_aligned_start_middle() { |
| 689 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 690 | builder.add(0x14, 0x19); |
| 691 | let expected = [0xA, 0x19, 0x28, 0x32]; |
| 692 | assert_eq!(builder.intervals, expected); |
| 693 | } |
| 694 | |
| 695 | #[test ] |
| 696 | fn test_add_aligned_end_middle() { |
| 697 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 698 | builder.add(0x23, 0x28); |
| 699 | let expected = [0xA, 0x14, 0x23, 0x32]; |
| 700 | assert_eq!(builder.intervals, expected); |
| 701 | } |
| 702 | |
| 703 | #[test ] |
| 704 | fn test_add_aligned_in_between_end() { |
| 705 | let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); |
| 706 | builder.add(0xF, 0x1E); |
| 707 | let expected = [0xA, 0x28, 0x32, 0x3C]; |
| 708 | assert_eq!(builder.intervals, expected); |
| 709 | } |
| 710 | |
| 711 | #[test ] |
| 712 | fn test_add_aligned_in_between_start() { |
| 713 | let mut builder = generate_tester(&[0xA, 0x14, 0x1E, 0x28, 0x32, 0x3C]); |
| 714 | builder.add(20, 35); |
| 715 | let expected = [0xA, 0x28, 0x32, 0x3C]; |
| 716 | assert_eq!(builder.intervals, expected); |
| 717 | } |
| 718 | |
| 719 | #[test ] |
| 720 | fn test_add_adjacent_ranges() { |
| 721 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 722 | builder.add(0x13, 0x14); |
| 723 | builder.add(0x14, 0x15); |
| 724 | builder.add(0x15, 0x16); |
| 725 | let expected = [0xA, 0x16, 0x28, 0x32]; |
| 726 | assert_eq!(builder.intervals, expected); |
| 727 | } |
| 728 | |
| 729 | #[test ] |
| 730 | fn test_add_codepointinversionlist() { |
| 731 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 732 | let check = CodePointInversionList::try_from_inversion_list_slice(&[ |
| 733 | 0x5, 0xA, 0x16, 0x21, 0x2C, 0x33, |
| 734 | ]) |
| 735 | .unwrap(); |
| 736 | builder.add_set(&check); |
| 737 | let expected = [0x5, 0x14, 0x16, 0x21, 0x28, 0x33]; |
| 738 | assert_eq!(builder.intervals, expected); |
| 739 | } |
| 740 | #[test ] |
| 741 | fn test_add_char() { |
| 742 | let mut builder = CodePointInversionListBuilder::new(); |
| 743 | builder.add_char('a' ); |
| 744 | let expected = [0x61, 0x62]; |
| 745 | assert_eq!(builder.intervals, expected); |
| 746 | } |
| 747 | |
| 748 | #[test ] |
| 749 | fn test_add_range() { |
| 750 | let mut builder = CodePointInversionListBuilder::new(); |
| 751 | builder.add_range(&('A' ..='Z' )); |
| 752 | let expected = [0x41, 0x5B]; |
| 753 | assert_eq!(builder.intervals, expected); |
| 754 | } |
| 755 | |
| 756 | #[test ] |
| 757 | fn test_add_range32() { |
| 758 | let mut builder = CodePointInversionListBuilder::new(); |
| 759 | builder.add_range32(&(0xd800..=0xdfff)); |
| 760 | let expected = [0xd800, 0xe000]; |
| 761 | assert_eq!(builder.intervals, expected); |
| 762 | } |
| 763 | |
| 764 | #[test ] |
| 765 | fn test_add_invalid_range() { |
| 766 | let mut builder = CodePointInversionListBuilder::new(); |
| 767 | builder.add_range(&('Z' ..='A' )); |
| 768 | assert!(builder.intervals.is_empty()); |
| 769 | } |
| 770 | |
| 771 | #[test ] |
| 772 | fn test_remove_empty() { |
| 773 | let mut builder = CodePointInversionListBuilder::new(); |
| 774 | builder.remove(0x0, 0xA); |
| 775 | assert!(builder.intervals.is_empty()); |
| 776 | } |
| 777 | |
| 778 | #[test ] |
| 779 | fn test_remove_entire_builder() { |
| 780 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 781 | builder.remove(0xA, 0x32); |
| 782 | assert!(builder.intervals.is_empty()); |
| 783 | } |
| 784 | |
| 785 | #[test ] |
| 786 | fn test_remove_entire_range() { |
| 787 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 788 | builder.remove(0xA, 0x14); |
| 789 | let expected = [0x28, 0x32]; |
| 790 | assert_eq!(builder.intervals, expected); |
| 791 | } |
| 792 | |
| 793 | #[test ] |
| 794 | fn test_remove_partial_range_left() { |
| 795 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 796 | builder.remove(0xA, 0x2B); |
| 797 | let expected = [0x2B, 0x32]; |
| 798 | assert_eq!(builder.intervals, expected); |
| 799 | } |
| 800 | |
| 801 | #[test ] |
| 802 | fn test_remove_ne_range() { |
| 803 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 804 | builder.remove(0x14, 0x28); |
| 805 | let expected = [0xA, 0x14, 0x28, 0x32]; |
| 806 | assert_eq!(builder.intervals, expected); |
| 807 | } |
| 808 | |
| 809 | #[test ] |
| 810 | fn test_remove_partial_range_right() { |
| 811 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 812 | builder.remove(0xF, 0x37); |
| 813 | let expected = [0xA, 0xF]; |
| 814 | assert_eq!(builder.intervals, expected); |
| 815 | } |
| 816 | |
| 817 | #[test ] |
| 818 | fn test_remove_middle_range() { |
| 819 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 820 | builder.remove(0xC, 0x12); |
| 821 | let expected = [0xA, 0xC, 0x12, 0x14, 0x28, 0x32]; |
| 822 | assert_eq!(builder.intervals, expected); |
| 823 | } |
| 824 | |
| 825 | #[test ] |
| 826 | fn test_remove_ne_middle_range() { |
| 827 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 828 | builder.remove(0x19, 0x1B); |
| 829 | let expected = [0xA, 0x14, 0x28, 0x32]; |
| 830 | assert_eq!(builder.intervals, expected); |
| 831 | } |
| 832 | |
| 833 | #[test ] |
| 834 | fn test_remove_encompassed_range() { |
| 835 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
| 836 | builder.remove(0x19, 0x37); |
| 837 | let expected = [0xA, 0x14, 0x46, 0x50]; |
| 838 | assert_eq!(builder.intervals, expected); |
| 839 | } |
| 840 | #[test ] |
| 841 | fn test_remove_adjacent_ranges() { |
| 842 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 843 | builder.remove(0x27, 0x28); |
| 844 | builder.remove(0x28, 0x29); |
| 845 | builder.remove(0x29, 0x2A); |
| 846 | let expected = [0xA, 0x14, 0x2A, 0x32]; |
| 847 | assert_eq!(builder.intervals, expected); |
| 848 | } |
| 849 | |
| 850 | #[test ] |
| 851 | fn test_remove_char() { |
| 852 | let mut builder = generate_tester(&[0x41, 0x46]); |
| 853 | builder.remove_char('A' ); // 65 |
| 854 | let expected = [0x42, 0x46]; |
| 855 | assert_eq!(builder.intervals, expected); |
| 856 | } |
| 857 | |
| 858 | #[test ] |
| 859 | fn test_remove_range() { |
| 860 | let mut builder = generate_tester(&[0x41, 0x5A]); |
| 861 | builder.remove_range(&('A' ..'L' )); // 65 - 76 |
| 862 | let expected = [0x4C, 0x5A]; |
| 863 | assert_eq!(builder.intervals, expected); |
| 864 | } |
| 865 | |
| 866 | #[test ] |
| 867 | fn test_remove_set() { |
| 868 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
| 869 | let remove = |
| 870 | CodePointInversionList::try_from_inversion_list_slice(&[0xA, 0x14, 0x2D, 0x4B]) |
| 871 | .unwrap(); |
| 872 | builder.remove_set(&remove); |
| 873 | let expected = [0x28, 0x2D, 0x4B, 0x50]; |
| 874 | assert_eq!(builder.intervals, expected); |
| 875 | } |
| 876 | |
| 877 | #[test ] |
| 878 | fn test_retain_char() { |
| 879 | let mut builder = generate_tester(&[0x41, 0x5A]); |
| 880 | builder.retain_char('A' ); // 65 |
| 881 | let expected = [0x41, 0x42]; |
| 882 | assert_eq!(builder.intervals, expected); |
| 883 | } |
| 884 | |
| 885 | #[test ] |
| 886 | fn test_retain_range() { |
| 887 | let mut builder = generate_tester(&[0x41, 0x5A]); |
| 888 | builder.retain_range(&('C' ..'F' )); // 67 - 70 |
| 889 | let expected = [0x43, 0x46]; |
| 890 | assert_eq!(builder.intervals, expected); |
| 891 | } |
| 892 | |
| 893 | #[test ] |
| 894 | fn test_retain_range_empty() { |
| 895 | let mut builder = generate_tester(&[0x41, 0x46]); |
| 896 | builder.retain_range(&('F' ..'Z' )); |
| 897 | assert!(builder.intervals.is_empty()); |
| 898 | } |
| 899 | |
| 900 | #[test ] |
| 901 | fn test_retain_set() { |
| 902 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32, 70, 80]); |
| 903 | let retain = CodePointInversionList::try_from_inversion_list_slice(&[ |
| 904 | 0xE, 0x14, 0x19, 0x37, 0x4D, 0x51, |
| 905 | ]) |
| 906 | .unwrap(); |
| 907 | builder.retain_set(&retain); |
| 908 | let expected = [0xE, 0x14, 0x28, 0x32, 0x4D, 0x50]; |
| 909 | assert_eq!(builder.intervals, expected); |
| 910 | } |
| 911 | |
| 912 | #[test ] |
| 913 | fn test_complement() { |
| 914 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 915 | builder.complement(); |
| 916 | let expected = [0x0, 0xA, 0x14, 0x28, 0x32, (char::MAX as u32) + 1]; |
| 917 | assert_eq!(builder.intervals, expected); |
| 918 | } |
| 919 | |
| 920 | #[test ] |
| 921 | fn test_complement_empty() { |
| 922 | let mut builder = generate_tester(&[]); |
| 923 | builder.complement(); |
| 924 | let expected = [0x0, (char::MAX as u32) + 1]; |
| 925 | assert_eq!(builder.intervals, expected); |
| 926 | |
| 927 | builder.complement(); |
| 928 | let expected: [u32; 0] = []; |
| 929 | assert_eq!(builder.intervals, expected); |
| 930 | } |
| 931 | |
| 932 | #[test ] |
| 933 | fn test_complement_zero_max() { |
| 934 | let mut builder = generate_tester(&[0x0, 0xA, 0x5A, (char::MAX as u32) + 1]); |
| 935 | builder.complement(); |
| 936 | let expected = [0xA, 0x5A]; |
| 937 | assert_eq!(builder.intervals, expected); |
| 938 | } |
| 939 | |
| 940 | #[test ] |
| 941 | fn test_complement_interior() { |
| 942 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 943 | builder.complement_list([0xE, 0x14].iter().copied()); |
| 944 | let expected = [0xA, 0xE, 0x28, 0x32]; |
| 945 | assert_eq!(builder.intervals, expected); |
| 946 | } |
| 947 | |
| 948 | #[test ] |
| 949 | fn test_complement_exterior() { |
| 950 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 951 | builder.complement_list([0x19, 0x23].iter().copied()); |
| 952 | let expected = [0xA, 0x14, 0x19, 0x23, 0x28, 0x32]; |
| 953 | assert_eq!(builder.intervals, expected); |
| 954 | } |
| 955 | |
| 956 | #[test ] |
| 957 | fn test_complement_larger_list() { |
| 958 | let mut builder = generate_tester(&[0xA, 0x14, 0x28, 0x32]); |
| 959 | builder.complement_list([0x1E, 0x37, 0x3C, 0x46].iter().copied()); |
| 960 | let expected = [0xA, 0x14, 0x1E, 0x28, 0x32, 0x37, 0x3C, 0x46]; |
| 961 | assert_eq!(builder.intervals, expected); |
| 962 | } |
| 963 | |
| 964 | #[test ] |
| 965 | fn test_complement_char() { |
| 966 | let mut builder = generate_tester(&[0x41, 0x4C]); // A - K |
| 967 | builder.complement_char('A' ); |
| 968 | builder.complement_char('L' ); |
| 969 | let expected = [0x42, 0x4D]; |
| 970 | assert_eq!(builder.intervals, expected); |
| 971 | } |
| 972 | |
| 973 | #[test ] |
| 974 | fn test_complement_range() { |
| 975 | let mut builder = generate_tester(&[0x46, 0x4C]); // F - K |
| 976 | builder.complement_range(&('A' ..='Z' )); |
| 977 | let expected = [0x41, 0x46, 0x4C, 0x5B]; |
| 978 | assert_eq!(builder.intervals, expected); |
| 979 | } |
| 980 | |
| 981 | #[test ] |
| 982 | fn test_complement_set() { |
| 983 | let mut builder = generate_tester(&[0x43, 0x4E]); |
| 984 | let set = CodePointInversionList::try_from_inversion_list_slice(&[0x41, 0x46, 0x4B, 0x5A]) |
| 985 | .unwrap(); |
| 986 | builder.complement_set(&set); |
| 987 | let expected = [0x41, 0x43, 0x46, 0x4B, 0x4E, 0x5A]; |
| 988 | assert_eq!(builder.intervals, expected); |
| 989 | } |
| 990 | |
| 991 | #[test ] |
| 992 | fn test_is_empty() { |
| 993 | let builder = CodePointInversionListBuilder::new(); |
| 994 | assert!(builder.is_empty()); |
| 995 | } |
| 996 | } |
| 997 | |