1use core::convert::TryFrom;
2
3use crate::util::{
4 bytes::{DeserializeError, SerializeError},
5 DebugByte,
6};
7
8/// Unit represents a single unit of input for DFA based regex engines.
9///
10/// **NOTE:** It is not expected for consumers of this crate to need to use
11/// this type unless they are implementing their own DFA. And even then, it's
12/// not required: implementors may use other techniques to handle input.
13///
14/// Typically, a single unit of input for a DFA would be a single byte.
15/// However, for the DFAs in this crate, matches are delayed by a single byte
16/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once
17/// we have consumed the haystack, we must run the DFA through one additional
18/// transition using an input that indicates the haystack has ended.
19///
20/// Since there is no way to represent a sentinel with a `u8` since all
21/// possible values *may* be valid inputs to a DFA, this type explicitly adds
22/// room for a sentinel value.
23///
24/// The sentinel EOI value is always its own equivalence class and is
25/// ultimately represented by adding 1 to the maximum equivalence class value.
26/// So for example, the regex `^[a-z]+$` might be split into the following
27/// equivalence classes:
28///
29/// ```text
30/// 0 => [\x00-`]
31/// 1 => [a-z]
32/// 2 => [{-\xFF]
33/// 3 => [EOI]
34/// ```
35///
36/// Where EOI is the special sentinel value that is always in its own
37/// singleton equivalence class.
38#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
39pub enum Unit {
40 U8(u8),
41 EOI(u16),
42}
43
44impl Unit {
45 /// Create a new input unit from a byte value.
46 ///
47 /// All possible byte values are legal. However, when creating an input
48 /// unit for a specific DFA, one should be careful to only construct input
49 /// units that are in that DFA's alphabet. Namely, one way to compact a
50 /// DFA's in-memory representation is to collapse its transitions to a set
51 /// of equivalence classes into a set of all possible byte values. If a
52 /// DFA uses equivalence classes instead of byte values, then the byte
53 /// given here should be the equivalence class.
54 pub fn u8(byte: u8) -> Unit {
55 Unit::U8(byte)
56 }
57
58 pub fn eoi(num_byte_equiv_classes: usize) -> Unit {
59 assert!(
60 num_byte_equiv_classes <= 256,
61 "max number of byte-based equivalent classes is 256, but got {}",
62 num_byte_equiv_classes,
63 );
64 Unit::EOI(u16::try_from(num_byte_equiv_classes).unwrap())
65 }
66
67 pub fn as_u8(self) -> Option<u8> {
68 match self {
69 Unit::U8(b) => Some(b),
70 Unit::EOI(_) => None,
71 }
72 }
73
74 #[cfg(feature = "alloc")]
75 pub fn as_eoi(self) -> Option<usize> {
76 match self {
77 Unit::U8(_) => None,
78 Unit::EOI(eoi) => Some(eoi as usize),
79 }
80 }
81
82 pub fn as_usize(self) -> usize {
83 match self {
84 Unit::U8(b) => b as usize,
85 Unit::EOI(eoi) => eoi as usize,
86 }
87 }
88
89 pub fn is_eoi(&self) -> bool {
90 match *self {
91 Unit::EOI(_) => true,
92 _ => false,
93 }
94 }
95
96 #[cfg(feature = "alloc")]
97 pub fn is_word_byte(&self) -> bool {
98 self.as_u8().map_or(false, crate::util::is_word_byte)
99 }
100}
101
102impl core::fmt::Debug for Unit {
103 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
104 match *self {
105 Unit::U8(b: u8) => write!(f, "{:?}", DebugByte(b)),
106 Unit::EOI(_) => write!(f, "EOI"),
107 }
108 }
109}
110
111/// A representation of byte oriented equivalence classes.
112///
113/// This is used in a DFA to reduce the size of the transition table. This can
114/// have a particularly large impact not only on the total size of a dense DFA,
115/// but also on compile times.
116#[derive(Clone, Copy)]
117pub struct ByteClasses([u8; 256]);
118
119impl ByteClasses {
120 /// Creates a new set of equivalence classes where all bytes are mapped to
121 /// the same class.
122 pub fn empty() -> ByteClasses {
123 ByteClasses([0; 256])
124 }
125
126 /// Creates a new set of equivalence classes where each byte belongs to
127 /// its own equivalence class.
128 #[cfg(feature = "alloc")]
129 pub fn singletons() -> ByteClasses {
130 let mut classes = ByteClasses::empty();
131 for i in 0..256 {
132 classes.set(i as u8, i as u8);
133 }
134 classes
135 }
136
137 /// Deserializes a byte class map from the given slice. If the slice is of
138 /// insufficient length or otherwise contains an impossible mapping, then
139 /// an error is returned. Upon success, the number of bytes read along with
140 /// the map are returned. The number of bytes read is always a multiple of
141 /// 8.
142 pub fn from_bytes(
143 slice: &[u8],
144 ) -> Result<(ByteClasses, usize), DeserializeError> {
145 if slice.len() < 256 {
146 return Err(DeserializeError::buffer_too_small("byte class map"));
147 }
148 let mut classes = ByteClasses::empty();
149 for (b, &class) in slice[..256].iter().enumerate() {
150 classes.set(b as u8, class);
151 }
152 for b in classes.iter() {
153 if b.as_usize() >= classes.alphabet_len() {
154 return Err(DeserializeError::generic(
155 "found equivalence class greater than alphabet len",
156 ));
157 }
158 }
159 Ok((classes, 256))
160 }
161
162 /// Writes this byte class map to the given byte buffer. if the given
163 /// buffer is too small, then an error is returned. Upon success, the total
164 /// number of bytes written is returned. The number of bytes written is
165 /// guaranteed to be a multiple of 8.
166 pub fn write_to(
167 &self,
168 mut dst: &mut [u8],
169 ) -> Result<usize, SerializeError> {
170 let nwrite = self.write_to_len();
171 if dst.len() < nwrite {
172 return Err(SerializeError::buffer_too_small("byte class map"));
173 }
174 for b in 0..=255 {
175 dst[0] = self.get(b);
176 dst = &mut dst[1..];
177 }
178 Ok(nwrite)
179 }
180
181 /// Returns the total number of bytes written by `write_to`.
182 pub fn write_to_len(&self) -> usize {
183 256
184 }
185
186 /// Set the equivalence class for the given byte.
187 #[inline]
188 pub fn set(&mut self, byte: u8, class: u8) {
189 self.0[byte as usize] = class;
190 }
191
192 /// Get the equivalence class for the given byte.
193 #[inline]
194 pub fn get(&self, byte: u8) -> u8 {
195 self.0[byte as usize]
196 }
197
198 /// Get the equivalence class for the given byte while forcefully
199 /// eliding bounds checks.
200 #[inline]
201 pub unsafe fn get_unchecked(&self, byte: u8) -> u8 {
202 *self.0.get_unchecked(byte as usize)
203 }
204
205 /// Get the equivalence class for the given input unit and return the
206 /// class as a `usize`.
207 #[inline]
208 pub fn get_by_unit(&self, unit: Unit) -> usize {
209 match unit {
210 Unit::U8(b) => usize::try_from(self.get(b)).unwrap(),
211 Unit::EOI(b) => usize::try_from(b).unwrap(),
212 }
213 }
214
215 #[inline]
216 pub fn eoi(&self) -> Unit {
217 Unit::eoi(self.alphabet_len().checked_sub(1).unwrap())
218 }
219
220 /// Return the total number of elements in the alphabet represented by
221 /// these equivalence classes. Equivalently, this returns the total number
222 /// of equivalence classes.
223 #[inline]
224 pub fn alphabet_len(&self) -> usize {
225 // Add one since the number of equivalence classes is one bigger than
226 // the last one. But add another to account for the final EOI class
227 // that isn't explicitly represented.
228 self.0[255] as usize + 1 + 1
229 }
230
231 /// Returns the stride, as a base-2 exponent, required for these
232 /// equivalence classes.
233 ///
234 /// The stride is always the smallest power of 2 that is greater than or
235 /// equal to the alphabet length. This is done so that converting between
236 /// state IDs and indices can be done with shifts alone, which is much
237 /// faster than integer division.
238 #[cfg(feature = "alloc")]
239 pub fn stride2(&self) -> usize {
240 self.alphabet_len().next_power_of_two().trailing_zeros() as usize
241 }
242
243 /// Returns true if and only if every byte in this class maps to its own
244 /// equivalence class. Equivalently, there are 257 equivalence classes
245 /// and each class contains exactly one byte (plus the special EOI class).
246 #[inline]
247 pub fn is_singleton(&self) -> bool {
248 self.alphabet_len() == 257
249 }
250
251 /// Returns an iterator over all equivalence classes in this set.
252 pub fn iter(&self) -> ByteClassIter<'_> {
253 ByteClassIter { classes: self, i: 0 }
254 }
255
256 /// Returns an iterator over a sequence of representative bytes from each
257 /// equivalence class. Namely, this yields exactly N items, where N is
258 /// equivalent to the number of equivalence classes. Each item is an
259 /// arbitrary byte drawn from each equivalence class.
260 ///
261 /// This is useful when one is determinizing an NFA and the NFA's alphabet
262 /// hasn't been converted to equivalence classes yet. Picking an arbitrary
263 /// byte from each equivalence class then permits a full exploration of
264 /// the NFA instead of using every possible byte value.
265 #[cfg(feature = "alloc")]
266 pub fn representatives(&self) -> ByteClassRepresentatives<'_> {
267 ByteClassRepresentatives { classes: self, byte: 0, last_class: None }
268 }
269
270 /// Returns an iterator of the bytes in the given equivalence class.
271 pub fn elements(&self, class: Unit) -> ByteClassElements {
272 ByteClassElements { classes: self, class, byte: 0 }
273 }
274
275 /// Returns an iterator of byte ranges in the given equivalence class.
276 ///
277 /// That is, a sequence of contiguous ranges are returned. Typically, every
278 /// class maps to a single contiguous range.
279 fn element_ranges(&self, class: Unit) -> ByteClassElementRanges {
280 ByteClassElementRanges { elements: self.elements(class), range: None }
281 }
282}
283
284impl core::fmt::Debug for ByteClasses {
285 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
286 if self.is_singleton() {
287 write!(f, "ByteClasses({{singletons}})")
288 } else {
289 write!(f, "ByteClasses(")?;
290 for (i: usize, class: Unit) in self.iter().enumerate() {
291 if i > 0 {
292 write!(f, ", ")?;
293 }
294 write!(f, "{:?} => [", class.as_usize())?;
295 for (start: Unit, end: Unit) in self.element_ranges(class) {
296 if start == end {
297 write!(f, "{:?}", start)?;
298 } else {
299 write!(f, "{:?}-{:?}", start, end)?;
300 }
301 }
302 write!(f, "]")?;
303 }
304 write!(f, ")")
305 }
306 }
307}
308
309/// An iterator over each equivalence class.
310#[derive(Debug)]
311pub struct ByteClassIter<'a> {
312 classes: &'a ByteClasses,
313 i: usize,
314}
315
316impl<'a> Iterator for ByteClassIter<'a> {
317 type Item = Unit;
318
319 fn next(&mut self) -> Option<Unit> {
320 if self.i + 1 == self.classes.alphabet_len() {
321 self.i += 1;
322 Some(self.classes.eoi())
323 } else if self.i < self.classes.alphabet_len() {
324 let class: u8 = self.i as u8;
325 self.i += 1;
326 Some(Unit::u8(byte:class))
327 } else {
328 None
329 }
330 }
331}
332
333/// An iterator over representative bytes from each equivalence class.
334#[cfg(feature = "alloc")]
335#[derive(Debug)]
336pub struct ByteClassRepresentatives<'a> {
337 classes: &'a ByteClasses,
338 byte: usize,
339 last_class: Option<u8>,
340}
341
342#[cfg(feature = "alloc")]
343impl<'a> Iterator for ByteClassRepresentatives<'a> {
344 type Item = Unit;
345
346 fn next(&mut self) -> Option<Unit> {
347 while self.byte < 256 {
348 let byte = self.byte as u8;
349 let class = self.classes.get(byte);
350 self.byte += 1;
351
352 if self.last_class != Some(class) {
353 self.last_class = Some(class);
354 return Some(Unit::u8(byte));
355 }
356 }
357 if self.byte == 256 {
358 self.byte += 1;
359 return Some(self.classes.eoi());
360 }
361 None
362 }
363}
364
365/// An iterator over all elements in an equivalence class.
366#[derive(Debug)]
367pub struct ByteClassElements<'a> {
368 classes: &'a ByteClasses,
369 class: Unit,
370 byte: usize,
371}
372
373impl<'a> Iterator for ByteClassElements<'a> {
374 type Item = Unit;
375
376 fn next(&mut self) -> Option<Unit> {
377 while self.byte < 256 {
378 let byte: u8 = self.byte as u8;
379 self.byte += 1;
380 if self.class.as_u8() == Some(self.classes.get(byte)) {
381 return Some(Unit::u8(byte));
382 }
383 }
384 if self.byte < 257 {
385 self.byte += 1;
386 if self.class.is_eoi() {
387 return Some(Unit::eoi(num_byte_equiv_classes:256));
388 }
389 }
390 None
391 }
392}
393
394/// An iterator over all elements in an equivalence class expressed as a
395/// sequence of contiguous ranges.
396#[derive(Debug)]
397pub struct ByteClassElementRanges<'a> {
398 elements: ByteClassElements<'a>,
399 range: Option<(Unit, Unit)>,
400}
401
402impl<'a> Iterator for ByteClassElementRanges<'a> {
403 type Item = (Unit, Unit);
404
405 fn next(&mut self) -> Option<(Unit, Unit)> {
406 loop {
407 let element = match self.elements.next() {
408 None => return self.range.take(),
409 Some(element) => element,
410 };
411 match self.range.take() {
412 None => {
413 self.range = Some((element, element));
414 }
415 Some((start, end)) => {
416 if end.as_usize() + 1 != element.as_usize()
417 || element.is_eoi()
418 {
419 self.range = Some((element, element));
420 return Some((start, end));
421 }
422 self.range = Some((start, element));
423 }
424 }
425 }
426 }
427}
428
429/// A byte class set keeps track of an *approximation* of equivalence classes
430/// of bytes during NFA construction. That is, every byte in an equivalence
431/// class cannot discriminate between a match and a non-match.
432///
433/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the
434/// same equivalence class because it never matters whether an `a` or a `b` is
435/// seen, and no combination of `a`s and `b`s in the text can discriminate a
436/// match.
437///
438/// Note though that this does not compute the minimal set of equivalence
439/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the
440/// same equivalence class for the same reason that `a` and `b` are in the
441/// same equivalence class in the aforementioned regex. However, in this
442/// implementation, `a` and `c` are put into distinct equivalence classes. The
443/// reason for this is implementation complexity. In the future, we should
444/// endeavor to compute the minimal equivalence classes since they can have a
445/// rather large impact on the size of the DFA. (Doing this will likely require
446/// rethinking how equivalence classes are computed, including changing the
447/// representation here, which is only able to group contiguous bytes into the
448/// same equivalence class.)
449#[derive(Clone, Debug)]
450pub struct ByteClassSet(ByteSet);
451
452impl ByteClassSet {
453 /// Create a new set of byte classes where all bytes are part of the same
454 /// equivalence class.
455 #[cfg(feature = "alloc")]
456 pub fn empty() -> Self {
457 ByteClassSet(ByteSet::empty())
458 }
459
460 /// Indicate the the range of byte given (inclusive) can discriminate a
461 /// match between it and all other bytes outside of the range.
462 #[cfg(feature = "alloc")]
463 pub fn set_range(&mut self, start: u8, end: u8) {
464 debug_assert!(start <= end);
465 if start > 0 {
466 self.0.add(start - 1);
467 }
468 self.0.add(end);
469 }
470
471 /// Add the contiguous ranges in the set given to this byte class set.
472 #[cfg(feature = "alloc")]
473 pub fn add_set(&mut self, set: &ByteSet) {
474 for (start, end) in set.iter_ranges() {
475 self.set_range(start, end);
476 }
477 }
478
479 /// Convert this boolean set to a map that maps all byte values to their
480 /// corresponding equivalence class. The last mapping indicates the largest
481 /// equivalence class identifier (which is never bigger than 255).
482 #[cfg(feature = "alloc")]
483 pub fn byte_classes(&self) -> ByteClasses {
484 let mut classes = ByteClasses::empty();
485 let mut class = 0u8;
486 let mut b = 0u8;
487 loop {
488 classes.set(b, class);
489 if b == 255 {
490 break;
491 }
492 if self.0.contains(b) {
493 class = class.checked_add(1).unwrap();
494 }
495 b = b.checked_add(1).unwrap();
496 }
497 classes
498 }
499}
500
501/// A simple set of bytes that is reasonably cheap to copy and allocation free.
502#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
503pub struct ByteSet {
504 bits: BitSet,
505}
506
507/// The representation of a byte set. Split out so that we can define a
508/// convenient Debug impl for it while keeping "ByteSet" in the output.
509#[derive(Clone, Copy, Default, Eq, PartialEq)]
510struct BitSet([u128; 2]);
511
512impl ByteSet {
513 /// Create an empty set of bytes.
514 #[cfg(feature = "alloc")]
515 pub fn empty() -> ByteSet {
516 ByteSet { bits: BitSet([0; 2]) }
517 }
518
519 /// Add a byte to this set.
520 ///
521 /// If the given byte already belongs to this set, then this is a no-op.
522 #[cfg(feature = "alloc")]
523 pub fn add(&mut self, byte: u8) {
524 let bucket = byte / 128;
525 let bit = byte % 128;
526 self.bits.0[bucket as usize] |= 1 << bit;
527 }
528
529 /// Add an inclusive range of bytes.
530 #[cfg(feature = "alloc")]
531 pub fn add_all(&mut self, start: u8, end: u8) {
532 for b in start..=end {
533 self.add(b);
534 }
535 }
536
537 /// Remove a byte from this set.
538 ///
539 /// If the given byte is not in this set, then this is a no-op.
540 #[cfg(feature = "alloc")]
541 pub fn remove(&mut self, byte: u8) {
542 let bucket = byte / 128;
543 let bit = byte % 128;
544 self.bits.0[bucket as usize] &= !(1 << bit);
545 }
546
547 /// Remove an inclusive range of bytes.
548 #[cfg(feature = "alloc")]
549 pub fn remove_all(&mut self, start: u8, end: u8) {
550 for b in start..=end {
551 self.remove(b);
552 }
553 }
554
555 /// Return true if and only if the given byte is in this set.
556 pub fn contains(&self, byte: u8) -> bool {
557 let bucket = byte / 128;
558 let bit = byte % 128;
559 self.bits.0[bucket as usize] & (1 << bit) > 0
560 }
561
562 /// Return true if and only if the given inclusive range of bytes is in
563 /// this set.
564 #[cfg(feature = "alloc")]
565 pub fn contains_range(&self, start: u8, end: u8) -> bool {
566 (start..=end).all(|b| self.contains(b))
567 }
568
569 /// Returns an iterator over all bytes in this set.
570 #[cfg(feature = "alloc")]
571 pub fn iter(&self) -> ByteSetIter {
572 ByteSetIter { set: self, b: 0 }
573 }
574
575 /// Returns an iterator over all contiguous ranges of bytes in this set.
576 #[cfg(feature = "alloc")]
577 pub fn iter_ranges(&self) -> ByteSetRangeIter {
578 ByteSetRangeIter { set: self, b: 0 }
579 }
580
581 /// Return the number of bytes in this set.
582 #[cfg(feature = "alloc")]
583 pub fn len(&self) -> usize {
584 (self.bits.0[0].count_ones() + self.bits.0[1].count_ones()) as usize
585 }
586
587 /// Return true if and only if this set is empty.
588 #[cfg(feature = "alloc")]
589 pub fn is_empty(&self) -> bool {
590 self.bits.0 == [0, 0]
591 }
592}
593
594impl core::fmt::Debug for BitSet {
595 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
596 let mut fmtd: DebugSet<'_, '_> = f.debug_set();
597 for b: u8 in (0..256).map(|b: i32| b as u8) {
598 if (ByteSet { bits: *self }).contains(byte:b) {
599 fmtd.entry(&b);
600 }
601 }
602 fmtd.finish()
603 }
604}
605
606#[derive(Debug)]
607pub struct ByteSetIter<'a> {
608 set: &'a ByteSet,
609 b: usize,
610}
611
612impl<'a> Iterator for ByteSetIter<'a> {
613 type Item = u8;
614
615 fn next(&mut self) -> Option<u8> {
616 while self.b <= 255 {
617 let b: u8 = self.b as u8;
618 self.b += 1;
619 if self.set.contains(byte:b) {
620 return Some(b);
621 }
622 }
623 None
624 }
625}
626
627#[derive(Debug)]
628pub struct ByteSetRangeIter<'a> {
629 set: &'a ByteSet,
630 b: usize,
631}
632
633impl<'a> Iterator for ByteSetRangeIter<'a> {
634 type Item = (u8, u8);
635
636 fn next(&mut self) -> Option<(u8, u8)> {
637 while self.b <= 255 {
638 let start: u8 = self.b as u8;
639 self.b += 1;
640 if !self.set.contains(byte:start) {
641 continue;
642 }
643
644 let mut end: u8 = start;
645 while self.b <= 255 && self.set.contains(self.b as u8) {
646 end = self.b as u8;
647 self.b += 1;
648 }
649 return Some((start, end));
650 }
651 None
652 }
653}
654
655#[cfg(test)]
656#[cfg(feature = "alloc")]
657mod tests {
658 use alloc::{vec, vec::Vec};
659
660 use super::*;
661
662 #[test]
663 fn byte_classes() {
664 let mut set = ByteClassSet::empty();
665 set.set_range(b'a', b'z');
666
667 let classes = set.byte_classes();
668 assert_eq!(classes.get(0), 0);
669 assert_eq!(classes.get(1), 0);
670 assert_eq!(classes.get(2), 0);
671 assert_eq!(classes.get(b'a' - 1), 0);
672 assert_eq!(classes.get(b'a'), 1);
673 assert_eq!(classes.get(b'm'), 1);
674 assert_eq!(classes.get(b'z'), 1);
675 assert_eq!(classes.get(b'z' + 1), 2);
676 assert_eq!(classes.get(254), 2);
677 assert_eq!(classes.get(255), 2);
678
679 let mut set = ByteClassSet::empty();
680 set.set_range(0, 2);
681 set.set_range(4, 6);
682 let classes = set.byte_classes();
683 assert_eq!(classes.get(0), 0);
684 assert_eq!(classes.get(1), 0);
685 assert_eq!(classes.get(2), 0);
686 assert_eq!(classes.get(3), 1);
687 assert_eq!(classes.get(4), 2);
688 assert_eq!(classes.get(5), 2);
689 assert_eq!(classes.get(6), 2);
690 assert_eq!(classes.get(7), 3);
691 assert_eq!(classes.get(255), 3);
692 }
693
694 #[test]
695 fn full_byte_classes() {
696 let mut set = ByteClassSet::empty();
697 for i in 0..256u16 {
698 set.set_range(i as u8, i as u8);
699 }
700 assert_eq!(set.byte_classes().alphabet_len(), 257);
701 }
702
703 #[test]
704 fn elements_typical() {
705 let mut set = ByteClassSet::empty();
706 set.set_range(b'b', b'd');
707 set.set_range(b'g', b'm');
708 set.set_range(b'z', b'z');
709 let classes = set.byte_classes();
710 // class 0: \x00-a
711 // class 1: b-d
712 // class 2: e-f
713 // class 3: g-m
714 // class 4: n-y
715 // class 5: z-z
716 // class 6: \x7B-\xFF
717 // class 7: EOI
718 assert_eq!(classes.alphabet_len(), 8);
719
720 let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
721 assert_eq!(elements.len(), 98);
722 assert_eq!(elements[0], Unit::u8(b'\x00'));
723 assert_eq!(elements[97], Unit::u8(b'a'));
724
725 let elements = classes.elements(Unit::u8(1)).collect::<Vec<_>>();
726 assert_eq!(
727 elements,
728 vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')],
729 );
730
731 let elements = classes.elements(Unit::u8(2)).collect::<Vec<_>>();
732 assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],);
733
734 let elements = classes.elements(Unit::u8(3)).collect::<Vec<_>>();
735 assert_eq!(
736 elements,
737 vec![
738 Unit::u8(b'g'),
739 Unit::u8(b'h'),
740 Unit::u8(b'i'),
741 Unit::u8(b'j'),
742 Unit::u8(b'k'),
743 Unit::u8(b'l'),
744 Unit::u8(b'm'),
745 ],
746 );
747
748 let elements = classes.elements(Unit::u8(4)).collect::<Vec<_>>();
749 assert_eq!(elements.len(), 12);
750 assert_eq!(elements[0], Unit::u8(b'n'));
751 assert_eq!(elements[11], Unit::u8(b'y'));
752
753 let elements = classes.elements(Unit::u8(5)).collect::<Vec<_>>();
754 assert_eq!(elements, vec![Unit::u8(b'z')]);
755
756 let elements = classes.elements(Unit::u8(6)).collect::<Vec<_>>();
757 assert_eq!(elements.len(), 133);
758 assert_eq!(elements[0], Unit::u8(b'\x7B'));
759 assert_eq!(elements[132], Unit::u8(b'\xFF'));
760
761 let elements = classes.elements(Unit::eoi(7)).collect::<Vec<_>>();
762 assert_eq!(elements, vec![Unit::eoi(256)]);
763 }
764
765 #[test]
766 fn elements_singletons() {
767 let classes = ByteClasses::singletons();
768 assert_eq!(classes.alphabet_len(), 257);
769
770 let elements = classes.elements(Unit::u8(b'a')).collect::<Vec<_>>();
771 assert_eq!(elements, vec![Unit::u8(b'a')]);
772
773 let elements = classes.elements(Unit::eoi(5)).collect::<Vec<_>>();
774 assert_eq!(elements, vec![Unit::eoi(256)]);
775 }
776
777 #[test]
778 fn elements_empty() {
779 let classes = ByteClasses::empty();
780 assert_eq!(classes.alphabet_len(), 2);
781
782 let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
783 assert_eq!(elements.len(), 256);
784 assert_eq!(elements[0], Unit::u8(b'\x00'));
785 assert_eq!(elements[255], Unit::u8(b'\xFF'));
786
787 let elements = classes.elements(Unit::eoi(1)).collect::<Vec<_>>();
788 assert_eq!(elements, vec![Unit::eoi(256)]);
789 }
790}
791