| 1 | use core::{char, cmp, fmt::Debug, slice}; | 
| 2 |  | 
|---|
| 3 | use alloc::vec::Vec; | 
|---|
| 4 |  | 
|---|
| 5 | use crate::unicode; | 
|---|
| 6 |  | 
|---|
| 7 | // This module contains an *internal* implementation of interval sets. | 
|---|
| 8 | // | 
|---|
| 9 | // The primary invariant that interval sets guards is canonical ordering. That | 
|---|
| 10 | // is, every interval set contains an ordered sequence of intervals where | 
|---|
| 11 | // no two intervals are overlapping or adjacent. While this invariant is | 
|---|
| 12 | // occasionally broken within the implementation, it should be impossible for | 
|---|
| 13 | // callers to observe it. | 
|---|
| 14 | // | 
|---|
| 15 | // Since case folding (as implemented below) breaks that invariant, we roll | 
|---|
| 16 | // that into this API even though it is a little out of place in an otherwise | 
|---|
| 17 | // generic interval set. (Hence the reason why the `unicode` module is imported | 
|---|
| 18 | // here.) | 
|---|
| 19 | // | 
|---|
| 20 | // Some of the implementation complexity here is a result of me wanting to | 
|---|
| 21 | // preserve the sequential representation without using additional memory. | 
|---|
| 22 | // In many cases, we do use linear extra memory, but it is at most 2x and it | 
|---|
| 23 | // is amortized. If we relaxed the memory requirements, this implementation | 
|---|
| 24 | // could become much simpler. The extra memory is honestly probably OK, but | 
|---|
| 25 | // character classes (especially of the Unicode variety) can become quite | 
|---|
| 26 | // large, and it would be nice to keep regex compilation snappy even in debug | 
|---|
| 27 | // builds. (In the past, I have been careless with this area of code and it has | 
|---|
| 28 | // caused slow regex compilations in debug mode, so this isn't entirely | 
|---|
| 29 | // unwarranted.) | 
|---|
| 30 | // | 
|---|
| 31 | // Tests on this are relegated to the public API of HIR in src/hir.rs. | 
|---|
| 32 |  | 
|---|
| 33 | #[ derive(Clone, Debug)] | 
|---|
| 34 | pub struct IntervalSet<I> { | 
|---|
| 35 | /// A sorted set of non-overlapping ranges. | 
|---|
| 36 | ranges: Vec<I>, | 
|---|
| 37 | /// While not required at all for correctness, we keep track of whether an | 
|---|
| 38 | /// interval set has been case folded or not. This helps us avoid doing | 
|---|
| 39 | /// redundant work if, for example, a set has already been cased folded. | 
|---|
| 40 | /// And note that whether a set is folded or not is preserved through | 
|---|
| 41 | /// all of the pairwise set operations. That is, if both interval sets | 
|---|
| 42 | /// have been case folded, then any of difference, union, intersection or | 
|---|
| 43 | /// symmetric difference all produce a case folded set. | 
|---|
| 44 | /// | 
|---|
| 45 | /// Note that when this is true, it *must* be the case that the set is case | 
|---|
| 46 | /// folded. But when it's false, the set *may* be case folded. In other | 
|---|
| 47 | /// words, we only set this to true when we know it to be case, but we're | 
|---|
| 48 | /// okay with it being false if it would otherwise be costly to determine | 
|---|
| 49 | /// whether it should be true. This means code cannot assume that a false | 
|---|
| 50 | /// value necessarily indicates that the set is not case folded. | 
|---|
| 51 | /// | 
|---|
| 52 | /// Bottom line: this is a performance optimization. | 
|---|
| 53 | folded: bool, | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | impl<I: Interval> Eq for IntervalSet<I> {} | 
|---|
| 57 |  | 
|---|
| 58 | // We implement PartialEq manually so that we don't consider the set's internal | 
|---|
| 59 | // 'folded' property to be part of its identity. The 'folded' property is | 
|---|
| 60 | // strictly an optimization. | 
|---|
| 61 | impl<I: Interval> PartialEq for IntervalSet<I> { | 
|---|
| 62 | fn eq(&self, other: &IntervalSet<I>) -> bool { | 
|---|
| 63 | self.ranges.eq(&other.ranges) | 
|---|
| 64 | } | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | impl<I: Interval> IntervalSet<I> { | 
|---|
| 68 | /// Create a new set from a sequence of intervals. Each interval is | 
|---|
| 69 | /// specified as a pair of bounds, where both bounds are inclusive. | 
|---|
| 70 | /// | 
|---|
| 71 | /// The given ranges do not need to be in any specific order, and ranges | 
|---|
| 72 | /// may overlap. | 
|---|
| 73 | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 
|---|
| 74 | let ranges: Vec<I> = intervals.into_iter().collect(); | 
|---|
| 75 | // An empty set is case folded. | 
|---|
| 76 | let folded = ranges.is_empty(); | 
|---|
| 77 | let mut set = IntervalSet { ranges, folded }; | 
|---|
| 78 | set.canonicalize(); | 
|---|
| 79 | set | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | /// Add a new interval to this set. | 
|---|
| 83 | pub fn push(&mut self, interval: I) { | 
|---|
| 84 | // TODO: This could be faster. e.g., Push the interval such that | 
|---|
| 85 | // it preserves canonicalization. | 
|---|
| 86 | self.ranges.push(interval); | 
|---|
| 87 | self.canonicalize(); | 
|---|
| 88 | // We don't know whether the new interval added here is considered | 
|---|
| 89 | // case folded, so we conservatively assume that the entire set is | 
|---|
| 90 | // no longer case folded if it was previously. | 
|---|
| 91 | self.folded = false; | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | /// Return an iterator over all intervals in this set. | 
|---|
| 95 | /// | 
|---|
| 96 | /// The iterator yields intervals in ascending order. | 
|---|
| 97 | pub fn iter(&self) -> IntervalSetIter<'_, I> { | 
|---|
| 98 | IntervalSetIter(self.ranges.iter()) | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | /// Return an immutable slice of intervals in this set. | 
|---|
| 102 | /// | 
|---|
| 103 | /// The sequence returned is in canonical ordering. | 
|---|
| 104 | pub fn intervals(&self) -> &[I] { | 
|---|
| 105 | &self.ranges | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | /// Expand this interval set such that it contains all case folded | 
|---|
| 109 | /// characters. For example, if this class consists of the range `a-z`, | 
|---|
| 110 | /// then applying case folding will result in the class containing both the | 
|---|
| 111 | /// ranges `a-z` and `A-Z`. | 
|---|
| 112 | /// | 
|---|
| 113 | /// This returns an error if the necessary case mapping data is not | 
|---|
| 114 | /// available. | 
|---|
| 115 | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { | 
|---|
| 116 | if self.folded { | 
|---|
| 117 | return Ok(()); | 
|---|
| 118 | } | 
|---|
| 119 | let len = self.ranges.len(); | 
|---|
| 120 | for i in 0..len { | 
|---|
| 121 | let range = self.ranges[i]; | 
|---|
| 122 | if let Err(err) = range.case_fold_simple(&mut self.ranges) { | 
|---|
| 123 | self.canonicalize(); | 
|---|
| 124 | return Err(err); | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 | self.canonicalize(); | 
|---|
| 128 | self.folded = true; | 
|---|
| 129 | Ok(()) | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | /// Union this set with the given set, in place. | 
|---|
| 133 | pub fn union(&mut self, other: &IntervalSet<I>) { | 
|---|
| 134 | if other.ranges.is_empty() || self.ranges == other.ranges { | 
|---|
| 135 | return; | 
|---|
| 136 | } | 
|---|
| 137 | // This could almost certainly be done more efficiently. | 
|---|
| 138 | self.ranges.extend(&other.ranges); | 
|---|
| 139 | self.canonicalize(); | 
|---|
| 140 | self.folded = self.folded && other.folded; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | /// Intersect this set with the given set, in place. | 
|---|
| 144 | pub fn intersect(&mut self, other: &IntervalSet<I>) { | 
|---|
| 145 | if self.ranges.is_empty() { | 
|---|
| 146 | return; | 
|---|
| 147 | } | 
|---|
| 148 | if other.ranges.is_empty() { | 
|---|
| 149 | self.ranges.clear(); | 
|---|
| 150 | // An empty set is case folded. | 
|---|
| 151 | self.folded = true; | 
|---|
| 152 | return; | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | // There should be a way to do this in-place with constant memory, | 
|---|
| 156 | // but I couldn't figure out a simple way to do it. So just append | 
|---|
| 157 | // the intersection to the end of this range, and then drain it before | 
|---|
| 158 | // we're done. | 
|---|
| 159 | let drain_end = self.ranges.len(); | 
|---|
| 160 |  | 
|---|
| 161 | let mut ita = 0..drain_end; | 
|---|
| 162 | let mut itb = 0..other.ranges.len(); | 
|---|
| 163 | let mut a = ita.next().unwrap(); | 
|---|
| 164 | let mut b = itb.next().unwrap(); | 
|---|
| 165 | loop { | 
|---|
| 166 | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { | 
|---|
| 167 | self.ranges.push(ab); | 
|---|
| 168 | } | 
|---|
| 169 | let (it, aorb) = | 
|---|
| 170 | if self.ranges[a].upper() < other.ranges[b].upper() { | 
|---|
| 171 | (&mut ita, &mut a) | 
|---|
| 172 | } else { | 
|---|
| 173 | (&mut itb, &mut b) | 
|---|
| 174 | }; | 
|---|
| 175 | match it.next() { | 
|---|
| 176 | Some(v) => *aorb = v, | 
|---|
| 177 | None => break, | 
|---|
| 178 | } | 
|---|
| 179 | } | 
|---|
| 180 | self.ranges.drain(..drain_end); | 
|---|
| 181 | self.folded = self.folded && other.folded; | 
|---|
| 182 | } | 
|---|
| 183 |  | 
|---|
| 184 | /// Subtract the given set from this set, in place. | 
|---|
| 185 | pub fn difference(&mut self, other: &IntervalSet<I>) { | 
|---|
| 186 | if self.ranges.is_empty() || other.ranges.is_empty() { | 
|---|
| 187 | return; | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | // This algorithm is (to me) surprisingly complex. A search of the | 
|---|
| 191 | // interwebs indicate that this is a potentially interesting problem. | 
|---|
| 192 | // Folks seem to suggest interval or segment trees, but I'd like to | 
|---|
| 193 | // avoid the overhead (both runtime and conceptual) of that. | 
|---|
| 194 | // | 
|---|
| 195 | // The following is basically my Shitty First Draft. Therefore, in | 
|---|
| 196 | // order to grok it, you probably need to read each line carefully. | 
|---|
| 197 | // Simplifications are most welcome! | 
|---|
| 198 | // | 
|---|
| 199 | // Remember, we can assume the canonical format invariant here, which | 
|---|
| 200 | // says that all ranges are sorted, not overlapping and not adjacent in | 
|---|
| 201 | // each class. | 
|---|
| 202 | let drain_end = self.ranges.len(); | 
|---|
| 203 | let (mut a, mut b) = (0, 0); | 
|---|
| 204 | 'LOOP: while a < drain_end && b < other.ranges.len() { | 
|---|
| 205 | // Basically, the easy cases are when neither range overlaps with | 
|---|
| 206 | // each other. If the `b` range is less than our current `a` | 
|---|
| 207 | // range, then we can skip it and move on. | 
|---|
| 208 | if other.ranges[b].upper() < self.ranges[a].lower() { | 
|---|
| 209 | b += 1; | 
|---|
| 210 | continue; | 
|---|
| 211 | } | 
|---|
| 212 | // ... similarly for the `a` range. If it's less than the smallest | 
|---|
| 213 | // `b` range, then we can add it as-is. | 
|---|
| 214 | if self.ranges[a].upper() < other.ranges[b].lower() { | 
|---|
| 215 | let range = self.ranges[a]; | 
|---|
| 216 | self.ranges.push(range); | 
|---|
| 217 | a += 1; | 
|---|
| 218 | continue; | 
|---|
| 219 | } | 
|---|
| 220 | // Otherwise, we have overlapping ranges. | 
|---|
| 221 | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); | 
|---|
| 222 |  | 
|---|
| 223 | // This part is tricky and was non-obvious to me without looking | 
|---|
| 224 | // at explicit examples (see the tests). The trickiness stems from | 
|---|
| 225 | // two things: 1) subtracting a range from another range could | 
|---|
| 226 | // yield two ranges and 2) after subtracting a range, it's possible | 
|---|
| 227 | // that future ranges can have an impact. The loop below advances | 
|---|
| 228 | // the `b` ranges until they can't possible impact the current | 
|---|
| 229 | // range. | 
|---|
| 230 | // | 
|---|
| 231 | // For example, if our `a` range is `a-t` and our next three `b` | 
|---|
| 232 | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply | 
|---|
| 233 | // subtraction three times before moving on to the next `a` range. | 
|---|
| 234 | let mut range = self.ranges[a]; | 
|---|
| 235 | while b < other.ranges.len() | 
|---|
| 236 | && !range.is_intersection_empty(&other.ranges[b]) | 
|---|
| 237 | { | 
|---|
| 238 | let old_range = range; | 
|---|
| 239 | range = match range.difference(&other.ranges[b]) { | 
|---|
| 240 | (None, None) => { | 
|---|
| 241 | // We lost the entire range, so move on to the next | 
|---|
| 242 | // without adding this one. | 
|---|
| 243 | a += 1; | 
|---|
| 244 | continue 'LOOP; | 
|---|
| 245 | } | 
|---|
| 246 | (Some(range1), None) | (None, Some(range1)) => range1, | 
|---|
| 247 | (Some(range1), Some(range2)) => { | 
|---|
| 248 | self.ranges.push(range1); | 
|---|
| 249 | range2 | 
|---|
| 250 | } | 
|---|
| 251 | }; | 
|---|
| 252 | // It's possible that the `b` range has more to contribute | 
|---|
| 253 | // here. In particular, if it is greater than the original | 
|---|
| 254 | // range, then it might impact the next `a` range *and* it | 
|---|
| 255 | // has impacted the current `a` range as much as possible, | 
|---|
| 256 | // so we can quit. We don't bump `b` so that the next `a` | 
|---|
| 257 | // range can apply it. | 
|---|
| 258 | if other.ranges[b].upper() > old_range.upper() { | 
|---|
| 259 | break; | 
|---|
| 260 | } | 
|---|
| 261 | // Otherwise, the next `b` range might apply to the current | 
|---|
| 262 | // `a` range. | 
|---|
| 263 | b += 1; | 
|---|
| 264 | } | 
|---|
| 265 | self.ranges.push(range); | 
|---|
| 266 | a += 1; | 
|---|
| 267 | } | 
|---|
| 268 | while a < drain_end { | 
|---|
| 269 | let range = self.ranges[a]; | 
|---|
| 270 | self.ranges.push(range); | 
|---|
| 271 | a += 1; | 
|---|
| 272 | } | 
|---|
| 273 | self.ranges.drain(..drain_end); | 
|---|
| 274 | self.folded = self.folded && other.folded; | 
|---|
| 275 | } | 
|---|
| 276 |  | 
|---|
| 277 | /// Compute the symmetric difference of the two sets, in place. | 
|---|
| 278 | /// | 
|---|
| 279 | /// This computes the symmetric difference of two interval sets. This | 
|---|
| 280 | /// removes all elements in this set that are also in the given set, | 
|---|
| 281 | /// but also adds all elements from the given set that aren't in this | 
|---|
| 282 | /// set. That is, the set will contain all elements in either set, | 
|---|
| 283 | /// but will not contain any elements that are in both sets. | 
|---|
| 284 | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { | 
|---|
| 285 | // TODO(burntsushi): Fix this so that it amortizes allocation. | 
|---|
| 286 | let mut intersection = self.clone(); | 
|---|
| 287 | intersection.intersect(other); | 
|---|
| 288 | self.union(other); | 
|---|
| 289 | self.difference(&intersection); | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | /// Negate this interval set. | 
|---|
| 293 | /// | 
|---|
| 294 | /// For all `x` where `x` is any element, if `x` was in this set, then it | 
|---|
| 295 | /// will not be in this set after negation. | 
|---|
| 296 | pub fn negate(&mut self) { | 
|---|
| 297 | if self.ranges.is_empty() { | 
|---|
| 298 | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); | 
|---|
| 299 | self.ranges.push(I::create(min, max)); | 
|---|
| 300 | // The set containing everything must case folded. | 
|---|
| 301 | self.folded = true; | 
|---|
| 302 | return; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | // There should be a way to do this in-place with constant memory, | 
|---|
| 306 | // but I couldn't figure out a simple way to do it. So just append | 
|---|
| 307 | // the negation to the end of this range, and then drain it before | 
|---|
| 308 | // we're done. | 
|---|
| 309 | let drain_end = self.ranges.len(); | 
|---|
| 310 |  | 
|---|
| 311 | // We do checked arithmetic below because of the canonical ordering | 
|---|
| 312 | // invariant. | 
|---|
| 313 | if self.ranges[0].lower() > I::Bound::min_value() { | 
|---|
| 314 | let upper = self.ranges[0].lower().decrement(); | 
|---|
| 315 | self.ranges.push(I::create(I::Bound::min_value(), upper)); | 
|---|
| 316 | } | 
|---|
| 317 | for i in 1..drain_end { | 
|---|
| 318 | let lower = self.ranges[i - 1].upper().increment(); | 
|---|
| 319 | let upper = self.ranges[i].lower().decrement(); | 
|---|
| 320 | self.ranges.push(I::create(lower, upper)); | 
|---|
| 321 | } | 
|---|
| 322 | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { | 
|---|
| 323 | let lower = self.ranges[drain_end - 1].upper().increment(); | 
|---|
| 324 | self.ranges.push(I::create(lower, I::Bound::max_value())); | 
|---|
| 325 | } | 
|---|
| 326 | self.ranges.drain(..drain_end); | 
|---|
| 327 | // We don't need to update whether this set is folded or not, because | 
|---|
| 328 | // it is conservatively preserved through negation. Namely, if a set | 
|---|
| 329 | // is not folded, then it is possible that its negation is folded, for | 
|---|
| 330 | // example, [^☃]. But we're fine with assuming that the set is not | 
|---|
| 331 | // folded in that case. (`folded` permits false negatives but not false | 
|---|
| 332 | // positives.) | 
|---|
| 333 | // | 
|---|
| 334 | // But what about when a set is folded, is its negation also | 
|---|
| 335 | // necessarily folded? Yes. Because if a set is folded, then for every | 
|---|
| 336 | // character in the set, it necessarily included its equivalence class | 
|---|
| 337 | // of case folded characters. Negating it in turn means that all | 
|---|
| 338 | // equivalence classes in the set are negated, and any equivalence | 
|---|
| 339 | // class that was previously not in the set is now entirely in the set. | 
|---|
| 340 | } | 
|---|
| 341 |  | 
|---|
| 342 | /// Converts this set into a canonical ordering. | 
|---|
| 343 | fn canonicalize(&mut self) { | 
|---|
| 344 | if self.is_canonical() { | 
|---|
| 345 | return; | 
|---|
| 346 | } | 
|---|
| 347 | self.ranges.sort(); | 
|---|
| 348 | assert!(!self.ranges.is_empty()); | 
|---|
| 349 |  | 
|---|
| 350 | // Is there a way to do this in-place with constant memory? I couldn't | 
|---|
| 351 | // figure out a way to do it. So just append the canonicalization to | 
|---|
| 352 | // the end of this range, and then drain it before we're done. | 
|---|
| 353 | let drain_end = self.ranges.len(); | 
|---|
| 354 | for oldi in 0..drain_end { | 
|---|
| 355 | // If we've added at least one new range, then check if we can | 
|---|
| 356 | // merge this range in the previously added range. | 
|---|
| 357 | if self.ranges.len() > drain_end { | 
|---|
| 358 | let (last, rest) = self.ranges.split_last_mut().unwrap(); | 
|---|
| 359 | if let Some(union) = last.union(&rest[oldi]) { | 
|---|
| 360 | *last = union; | 
|---|
| 361 | continue; | 
|---|
| 362 | } | 
|---|
| 363 | } | 
|---|
| 364 | let range = self.ranges[oldi]; | 
|---|
| 365 | self.ranges.push(range); | 
|---|
| 366 | } | 
|---|
| 367 | self.ranges.drain(..drain_end); | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|
| 370 | /// Returns true if and only if this class is in a canonical ordering. | 
|---|
| 371 | fn is_canonical(&self) -> bool { | 
|---|
| 372 | for pair in self.ranges.windows(2) { | 
|---|
| 373 | if pair[0] >= pair[1] { | 
|---|
| 374 | return false; | 
|---|
| 375 | } | 
|---|
| 376 | if pair[0].is_contiguous(&pair[1]) { | 
|---|
| 377 | return false; | 
|---|
| 378 | } | 
|---|
| 379 | } | 
|---|
| 380 | true | 
|---|
| 381 | } | 
|---|
| 382 | } | 
|---|
| 383 |  | 
|---|
| 384 | /// An iterator over intervals. | 
|---|
| 385 | #[ derive(Debug)] | 
|---|
| 386 | pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>); | 
|---|
| 387 |  | 
|---|
| 388 | impl<'a, I> Iterator for IntervalSetIter<'a, I> { | 
|---|
| 389 | type Item = &'a I; | 
|---|
| 390 |  | 
|---|
| 391 | fn next(&mut self) -> Option<&'a I> { | 
|---|
| 392 | self.0.next() | 
|---|
| 393 | } | 
|---|
| 394 | } | 
|---|
| 395 |  | 
|---|
| 396 | pub trait Interval: | 
|---|
| 397 | Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord | 
|---|
| 398 | { | 
|---|
| 399 | type Bound: Bound; | 
|---|
| 400 |  | 
|---|
| 401 | fn lower(&self) -> Self::Bound; | 
|---|
| 402 | fn upper(&self) -> Self::Bound; | 
|---|
| 403 | fn set_lower(&mut self, bound: Self::Bound); | 
|---|
| 404 | fn set_upper(&mut self, bound: Self::Bound); | 
|---|
| 405 | fn case_fold_simple( | 
|---|
| 406 | &self, | 
|---|
| 407 | intervals: &mut Vec<Self>, | 
|---|
| 408 | ) -> Result<(), unicode::CaseFoldError>; | 
|---|
| 409 |  | 
|---|
| 410 | /// Create a new interval. | 
|---|
| 411 | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { | 
|---|
| 412 | let mut int = Self::default(); | 
|---|
| 413 | if lower <= upper { | 
|---|
| 414 | int.set_lower(lower); | 
|---|
| 415 | int.set_upper(upper); | 
|---|
| 416 | } else { | 
|---|
| 417 | int.set_lower(upper); | 
|---|
| 418 | int.set_upper(lower); | 
|---|
| 419 | } | 
|---|
| 420 | int | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | /// Union the given overlapping range into this range. | 
|---|
| 424 | /// | 
|---|
| 425 | /// If the two ranges aren't contiguous, then this returns `None`. | 
|---|
| 426 | fn union(&self, other: &Self) -> Option<Self> { | 
|---|
| 427 | if !self.is_contiguous(other) { | 
|---|
| 428 | return None; | 
|---|
| 429 | } | 
|---|
| 430 | let lower = cmp::min(self.lower(), other.lower()); | 
|---|
| 431 | let upper = cmp::max(self.upper(), other.upper()); | 
|---|
| 432 | Some(Self::create(lower, upper)) | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | /// Intersect this range with the given range and return the result. | 
|---|
| 436 | /// | 
|---|
| 437 | /// If the intersection is empty, then this returns `None`. | 
|---|
| 438 | fn intersect(&self, other: &Self) -> Option<Self> { | 
|---|
| 439 | let lower = cmp::max(self.lower(), other.lower()); | 
|---|
| 440 | let upper = cmp::min(self.upper(), other.upper()); | 
|---|
| 441 | if lower <= upper { | 
|---|
| 442 | Some(Self::create(lower, upper)) | 
|---|
| 443 | } else { | 
|---|
| 444 | None | 
|---|
| 445 | } | 
|---|
| 446 | } | 
|---|
| 447 |  | 
|---|
| 448 | /// Subtract the given range from this range and return the resulting | 
|---|
| 449 | /// ranges. | 
|---|
| 450 | /// | 
|---|
| 451 | /// If subtraction would result in an empty range, then no ranges are | 
|---|
| 452 | /// returned. | 
|---|
| 453 | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { | 
|---|
| 454 | if self.is_subset(other) { | 
|---|
| 455 | return (None, None); | 
|---|
| 456 | } | 
|---|
| 457 | if self.is_intersection_empty(other) { | 
|---|
| 458 | return (Some(self.clone()), None); | 
|---|
| 459 | } | 
|---|
| 460 | let add_lower = other.lower() > self.lower(); | 
|---|
| 461 | let add_upper = other.upper() < self.upper(); | 
|---|
| 462 | // We know this because !self.is_subset(other) and the ranges have | 
|---|
| 463 | // a non-empty intersection. | 
|---|
| 464 | assert!(add_lower || add_upper); | 
|---|
| 465 | let mut ret = (None, None); | 
|---|
| 466 | if add_lower { | 
|---|
| 467 | let upper = other.lower().decrement(); | 
|---|
| 468 | ret.0 = Some(Self::create(self.lower(), upper)); | 
|---|
| 469 | } | 
|---|
| 470 | if add_upper { | 
|---|
| 471 | let lower = other.upper().increment(); | 
|---|
| 472 | let range = Self::create(lower, self.upper()); | 
|---|
| 473 | if ret.0.is_none() { | 
|---|
| 474 | ret.0 = Some(range); | 
|---|
| 475 | } else { | 
|---|
| 476 | ret.1 = Some(range); | 
|---|
| 477 | } | 
|---|
| 478 | } | 
|---|
| 479 | ret | 
|---|
| 480 | } | 
|---|
| 481 |  | 
|---|
| 482 | /// Returns true if and only if the two ranges are contiguous. Two ranges | 
|---|
| 483 | /// are contiguous if and only if the ranges are either overlapping or | 
|---|
| 484 | /// adjacent. | 
|---|
| 485 | fn is_contiguous(&self, other: &Self) -> bool { | 
|---|
| 486 | let lower1 = self.lower().as_u32(); | 
|---|
| 487 | let upper1 = self.upper().as_u32(); | 
|---|
| 488 | let lower2 = other.lower().as_u32(); | 
|---|
| 489 | let upper2 = other.upper().as_u32(); | 
|---|
| 490 | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) | 
|---|
| 491 | } | 
|---|
| 492 |  | 
|---|
| 493 | /// Returns true if and only if the intersection of this range and the | 
|---|
| 494 | /// other range is empty. | 
|---|
| 495 | fn is_intersection_empty(&self, other: &Self) -> bool { | 
|---|
| 496 | let (lower1, upper1) = (self.lower(), self.upper()); | 
|---|
| 497 | let (lower2, upper2) = (other.lower(), other.upper()); | 
|---|
| 498 | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) | 
|---|
| 499 | } | 
|---|
| 500 |  | 
|---|
| 501 | /// Returns true if and only if this range is a subset of the other range. | 
|---|
| 502 | fn is_subset(&self, other: &Self) -> bool { | 
|---|
| 503 | let (lower1, upper1) = (self.lower(), self.upper()); | 
|---|
| 504 | let (lower2, upper2) = (other.lower(), other.upper()); | 
|---|
| 505 | (lower2 <= lower1 && lower1 <= upper2) | 
|---|
| 506 | && (lower2 <= upper1 && upper1 <= upper2) | 
|---|
| 507 | } | 
|---|
| 508 | } | 
|---|
| 509 |  | 
|---|
| 510 | pub trait Bound: | 
|---|
| 511 | Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord | 
|---|
| 512 | { | 
|---|
| 513 | fn min_value() -> Self; | 
|---|
| 514 | fn max_value() -> Self; | 
|---|
| 515 | fn as_u32(self) -> u32; | 
|---|
| 516 | fn increment(self) -> Self; | 
|---|
| 517 | fn decrement(self) -> Self; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | impl Bound for u8 { | 
|---|
| 521 | fn min_value() -> Self { | 
|---|
| 522 | u8::MIN | 
|---|
| 523 | } | 
|---|
| 524 | fn max_value() -> Self { | 
|---|
| 525 | u8::MAX | 
|---|
| 526 | } | 
|---|
| 527 | fn as_u32(self) -> u32 { | 
|---|
| 528 | u32::from(self) | 
|---|
| 529 | } | 
|---|
| 530 | fn increment(self) -> Self { | 
|---|
| 531 | self.checked_add(1).unwrap() | 
|---|
| 532 | } | 
|---|
| 533 | fn decrement(self) -> Self { | 
|---|
| 534 | self.checked_sub(1).unwrap() | 
|---|
| 535 | } | 
|---|
| 536 | } | 
|---|
| 537 |  | 
|---|
| 538 | impl Bound for char { | 
|---|
| 539 | fn min_value() -> Self { | 
|---|
| 540 | '\x00 ' | 
|---|
| 541 | } | 
|---|
| 542 | fn max_value() -> Self { | 
|---|
| 543 | '\u{10FFFF} ' | 
|---|
| 544 | } | 
|---|
| 545 | fn as_u32(self) -> u32 { | 
|---|
| 546 | u32::from(self) | 
|---|
| 547 | } | 
|---|
| 548 |  | 
|---|
| 549 | fn increment(self) -> Self { | 
|---|
| 550 | match self { | 
|---|
| 551 | '\u{D7FF} '=> '\u{E000} ', | 
|---|
| 552 | c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(), | 
|---|
| 553 | } | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | fn decrement(self) -> Self { | 
|---|
| 557 | match self { | 
|---|
| 558 | '\u{E000} '=> '\u{D7FF} ', | 
|---|
| 559 | c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(), | 
|---|
| 560 | } | 
|---|
| 561 | } | 
|---|
| 562 | } | 
|---|
| 563 |  | 
|---|
| 564 | // Tests for interval sets are written in src/hir.rs against the public API. | 
|---|
| 565 |  | 
|---|