alphabet.rs source code [crates/regex-automata-0.2.0/src/util/alphabet.rs]

1	use core::convert::TryFrom;
2
3	use crate::util::{
4	bytes::{DeserializeError, SerializeError},
5	DebugByte,
6	};
7
8	/// Unit represents a single unit of input for DFA based regex engines.
9	///
10	/// NOTE:* It is not expected for consumers of this crate to need to use*
11	/// this type unless they are implementing their own DFA. And even then, it's
12	/// not required: implementors may use other techniques to handle input.
13	///
14	/// Typically, a single unit of input for a DFA would be a single byte.
15	/// However, for the DFAs in this crate, matches are delayed by a single byte
16	/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once
17	/// we have consumed the haystack, we must run the DFA through one additional
18	/// transition using an input that indicates the haystack has ended.
19	///
20	/// Since there is no way to represent a sentinel with a `u8` since all
21	/// possible values may* be valid inputs to a DFA, this type explicitly adds*
22	/// room for a sentinel value.
23	///
24	/// The sentinel EOI value is always its own equivalence class and is
25	/// ultimately represented by adding 1 to the maximum equivalence class value.
26	/// So for example, the regex `^[a-z]+$` might be split into the following
27	/// equivalence classes:
28	///
29	/// ```text
30	/// 0 => [\x00-`]
31	/// 1 => [a-z]
32	/// 2 => [{-\xFF]
33	/// 3 => [EOI]
34	/// ```
35	///
36	/// Where EOI is the special sentinel value that is always in its own
37	/// singleton equivalence class.
38	#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
39	pub enum Unit {
40	U8(u8),
41	EOI(u16),
42	}
43
44	impl Unit {
45	/// Create a new input unit from a byte value.
46	///
47	/// All possible byte values are legal. However, when creating an input
48	/// unit for a specific DFA, one should be careful to only construct input
49	/// units that are in that DFA's alphabet. Namely, one way to compact a
50	/// DFA's in-memory representation is to collapse its transitions to a set
51	/// of equivalence classes into a set of all possible byte values. If a
52	/// DFA uses equivalence classes instead of byte values, then the byte
53	/// given here should be the equivalence class.
54	pub fn u8(byte: u8) -> Unit {
55	Unit::U8(byte)
56	}
57
58	pub fn eoi(num_byte_equiv_classes: usize) -> Unit {
59	assert!(
60	num_byte_equiv_classes <= `256`,
61	"max number of byte-based equivalent classes is 256, but got {}",
62	num_byte_equiv_classes,
63	);
64	Unit::EOI(u16::try_from(num_byte_equiv_classes).unwrap())
65	}
66
67	pub fn as_u8(self) -> Option<u8> {
68	match self {
69	Unit::U8(b) => Some(b),
70	Unit::EOI(_) => None,
71	}
72	}
73
74	#[cfg(feature = "alloc")]
75	pub fn as_eoi(self) -> Option<usize> {
76	match self {
77	Unit::U8(_) => None,
78	Unit::EOI(eoi) => Some(eoi as usize),
79	}
80	}
81
82	pub fn as_usize(self) -> usize {
83	match self {
84	Unit::U8(b) => b as usize,
85	Unit::EOI(eoi) => eoi as usize,
86	}
87	}
88
89	pub fn is_eoi(&self) -> bool {
90	match *self {
91	Unit::EOI(_) => `true`,
92	_ => `false`,
93	}
94	}
95
96	#[cfg(feature = "alloc")]
97	pub fn is_word_byte(&self) -> bool {
98	self.as_u8().map_or(`false`, crate::util::is_word_byte)
99	}
100	}
101
102	impl core::fmt::Debug for Unit {
103	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
104	match *self {
105	Unit::U8(b: u8) => write!(f, "{:?}", DebugByte(b)),
106	Unit::EOI(_) => write!(f, "EOI"),
107	}
108	}
109	}
110
111	/// A representation of byte oriented equivalence classes.
112	///
113	/// This is used in a DFA to reduce the size of the transition table. This can
114	/// have a particularly large impact not only on the total size of a dense DFA,
115	/// but also on compile times.
116	#[derive(Clone, Copy)]
117	pub struct ByteClasses([u8; `256`]);
118
119	impl ByteClasses {
120	/// Creates a new set of equivalence classes where all bytes are mapped to
121	/// the same class.
122	pub fn empty() -> ByteClasses {
123	ByteClasses([`0`; `256`])
124	}
125
126	/// Creates a new set of equivalence classes where each byte belongs to
127	/// its own equivalence class.
128	#[cfg(feature = "alloc")]
129	pub fn singletons() -> ByteClasses {
130	let mut classes = ByteClasses::empty();
131	for i in `0`..`256` {
132	classes.set(i as u8, i as u8);
133	}
134	classes
135	}
136
137	/// Deserializes a byte class map from the given slice. If the slice is of
138	/// insufficient length or otherwise contains an impossible mapping, then
139	/// an error is returned. Upon success, the number of bytes read along with
140	/// the map are returned. The number of bytes read is always a multiple of
141	/// 8.
142	pub fn from_bytes(
143	slice: &[u8],
144	) -> Result<(ByteClasses, usize), DeserializeError> {
145	if slice.len() < `256` {
146	return Err(DeserializeError::buffer_too_small("byte class map"));
147	}
148	let mut classes = ByteClasses::empty();
149	for (b, &class) in slice[..`256`].iter().enumerate() {
150	classes.set(b as u8, class);
151	}
152	for b in classes.iter() {
153	if b.as_usize() >= classes.alphabet_len() {
154	return Err(DeserializeError::generic(
155	"found equivalence class greater than alphabet len",
156	));
157	}
158	}
159	Ok((classes, `256`))
160	}
161
162	/// Writes this byte class map to the given byte buffer. if the given
163	/// buffer is too small, then an error is returned. Upon success, the total
164	/// number of bytes written is returned. The number of bytes written is
165	/// guaranteed to be a multiple of 8.
166	pub fn write_to(
167	&self,
168	mut dst: &mut [u8],
169	) -> Result<usize, SerializeError> {
170	let nwrite = self.write_to_len();
171	if dst.len() < nwrite {
172	return Err(SerializeError::buffer_too_small("byte class map"));
173	}
174	for b in `0`..=`255` {
175	dst[`0`] = self.get(b);
176	dst = &mut dst[`1`..];
177	}
178	Ok(nwrite)
179	}
180
181	/// Returns the total number of bytes written by `write_to`.
182	pub fn write_to_len(&self) -> usize {
183	`256`
184	}
185
186	/// Set the equivalence class for the given byte.
187	#[inline]
188	pub fn set(&mut self, byte: u8, class: u8) {
189	self.0[byte as usize] = class;
190	}
191
192	/// Get the equivalence class for the given byte.
193	#[inline]
194	pub fn get(&self, byte: u8) -> u8 {
195	self.0[byte as usize]
196	}
197
198	/// Get the equivalence class for the given byte while forcefully
199	/// eliding bounds checks.
200	#[inline]
201	pub unsafe fn get_unchecked(&self, byte: u8) -> u8 {
202	self.0.get_unchecked(byte as usize*)
203	}
204
205	/// Get the equivalence class for the given input unit and return the
206	/// class as a `usize`.
207	#[inline]
208	pub fn get_by_unit(&self, unit: Unit) -> usize {
209	match unit {
210	Unit::U8(b) => usize::try_from(self.get(b)).unwrap(),
211	Unit::EOI(b) => usize::try_from(b).unwrap(),
212	}
213	}
214
215	#[inline]
216	pub fn eoi(&self) -> Unit {
217	Unit::eoi(self.alphabet_len().checked_sub(`1`).unwrap())
218	}
219
220	/// Return the total number of elements in the alphabet represented by
221	/// these equivalence classes. Equivalently, this returns the total number
222	/// of equivalence classes.
223	#[inline]
224	pub fn alphabet_len(&self) -> usize {
225	// Add one since the number of equivalence classes is one bigger than
226	// the last one. But add another to account for the final EOI class
227	// that isn't explicitly represented.
228	self.0[`255`] as usize + `1` + `1`
229	}
230
231	/// Returns the stride, as a base-2 exponent, required for these
232	/// equivalence classes.
233	///
234	/// The stride is always the smallest power of 2 that is greater than or
235	/// equal to the alphabet length. This is done so that converting between
236	/// state IDs and indices can be done with shifts alone, which is much
237	/// faster than integer division.
238	#[cfg(feature = "alloc")]
239	pub fn stride2(&self) -> usize {
240	self.alphabet_len().next_power_of_two().trailing_zeros() as usize
241	}
242
243	/// Returns true if and only if every byte in this class maps to its own
244	/// equivalence class. Equivalently, there are 257 equivalence classes
245	/// and each class contains exactly one byte (plus the special EOI class).
246	#[inline]
247	pub fn is_singleton(&self) -> bool {
248	self.alphabet_len() == `257`
249	}
250
251	/// Returns an iterator over all equivalence classes in this set.
252	pub fn iter(&self) -> ByteClassIter<'_> {
253	ByteClassIter { classes: self, i: `0` }
254	}
255
256	/// Returns an iterator over a sequence of representative bytes from each
257	/// equivalence class. Namely, this yields exactly N items, where N is
258	/// equivalent to the number of equivalence classes. Each item is an
259	/// arbitrary byte drawn from each equivalence class.
260	///
261	/// This is useful when one is determinizing an NFA and the NFA's alphabet
262	/// hasn't been converted to equivalence classes yet. Picking an arbitrary
263	/// byte from each equivalence class then permits a full exploration of
264	/// the NFA instead of using every possible byte value.
265	#[cfg(feature = "alloc")]
266	pub fn representatives(&self) -> ByteClassRepresentatives<'_> {
267	ByteClassRepresentatives { classes: self, byte: `0`, last_class: None }
268	}
269
270	/// Returns an iterator of the bytes in the given equivalence class.
271	pub fn elements(&self, class: Unit) -> ByteClassElements {
272	ByteClassElements { classes: self, class, byte: `0` }
273	}
274
275	/// Returns an iterator of byte ranges in the given equivalence class.
276	///
277	/// That is, a sequence of contiguous ranges are returned. Typically, every
278	/// class maps to a single contiguous range.
279	fn element_ranges(&self, class: Unit) -> ByteClassElementRanges {
280	ByteClassElementRanges { elements: self.elements(class), range: None }
281	}
282	}
283
284	impl core::fmt::Debug for ByteClasses {
285	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
286	if self.is_singleton() {
287	write!(f, "ByteClasses(`{{`singletons`}}`)")
288	} else {
289	write!(f, "ByteClasses(")?;
290	for (i: usize, class: Unit) in self.iter().enumerate() {
291	if i > `0` {
292	write!(f, ", ")?;
293	}
294	write!(f, "{:?} => [", class.as_usize())?;
295	for (start: Unit, end: Unit) in self.element_ranges(class) {
296	if start == end {
297	write!(f, "{:?}", start)?;
298	} else {
299	write!(f, "{:?}-{:?}", start, end)?;
300	}
301	}
302	write!(f, "]")?;
303	}
304	write!(f, ")")
305	}
306	}
307	}
308
309	/// An iterator over each equivalence class.
310	#[derive(Debug)]
311	pub struct ByteClassIter<'a> {
312	classes: &'a ByteClasses,
313	i: usize,
314	}
315
316	impl<'a> Iterator for ByteClassIter<'a> {
317	type Item = Unit;
318
319	fn next(&mut self) -> Option<Unit> {
320	if self.i + `1` == self.classes.alphabet_len() {
321	self.i += `1`;
322	Some(self.classes.eoi())
323	} else if self.i < self.classes.alphabet_len() {
324	let class: u8 = self.i as u8;
325	self.i += `1`;
326	Some(Unit::u8(byte:class))
327	} else {
328	None
329	}
330	}
331	}
332
333	/// An iterator over representative bytes from each equivalence class.
334	#[cfg(feature = "alloc")]
335	#[derive(Debug)]
336	pub struct ByteClassRepresentatives<'a> {
337	classes: &'a ByteClasses,
338	byte: usize,
339	last_class: Option<u8>,
340	}
341
342	#[cfg(feature = "alloc")]
343	impl<'a> Iterator for ByteClassRepresentatives<'a> {
344	type Item = Unit;
345
346	fn next(&mut self) -> Option<Unit> {
347	while self.byte < `256` {
348	let byte = self.byte as u8;
349	let class = self.classes.get(byte);
350	self.byte += `1`;
351
352	if self.last_class != Some(class) {
353	self.last_class = Some(class);
354	return Some(Unit::u8(byte));
355	}
356	}
357	if self.byte == `256` {
358	self.byte += `1`;
359	return Some(self.classes.eoi());
360	}
361	None
362	}
363	}
364
365	/// An iterator over all elements in an equivalence class.
366	#[derive(Debug)]
367	pub struct ByteClassElements<'a> {
368	classes: &'a ByteClasses,
369	class: Unit,
370	byte: usize,
371	}
372
373	impl<'a> Iterator for ByteClassElements<'a> {
374	type Item = Unit;
375
376	fn next(&mut self) -> Option<Unit> {
377	while self.byte < `256` {
378	let byte: u8 = self.byte as u8;
379	self.byte += `1`;
380	if self.class.as_u8() == Some(self.classes.get(byte)) {
381	return Some(Unit::u8(byte));
382	}
383	}
384	if self.byte < `257` {
385	self.byte += `1`;
386	if self.class.is_eoi() {
387	return Some(Unit::eoi(num_byte_equiv_classes:`256`));
388	}
389	}
390	None
391	}
392	}
393
394	/// An iterator over all elements in an equivalence class expressed as a
395	/// sequence of contiguous ranges.
396	#[derive(Debug)]
397	pub struct ByteClassElementRanges<'a> {
398	elements: ByteClassElements<'a>,
399	range: Option<(Unit, Unit)>,
400	}
401
402	impl<'a> Iterator for ByteClassElementRanges<'a> {
403	type Item = (Unit, Unit);
404
405	fn next(&mut self) -> Option<(Unit, Unit)> {
406	loop {
407	let element = match self.elements.next() {
408	None => return self.range.take(),
409	Some(element) => element,
410	};
411	match self.range.take() {
412	None => {
413	self.range = Some((element, element));
414	}
415	Some((start, end)) => {
416	if end.as_usize() + `1` != element.as_usize()
417	\|\| element.is_eoi()
418	{
419	self.range = Some((element, element));
420	return Some((start, end));
421	}
422	self.range = Some((start, element));
423	}
424	}
425	}
426	}
427	}
428
429	/// A byte class set keeps track of an approximation* of equivalence classes*
430	/// of bytes during NFA construction. That is, every byte in an equivalence
431	/// class cannot discriminate between a match and a non-match.
432	///
433	/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the
434	/// same equivalence class because it never matters whether an `a` or a `b` is
435	/// seen, and no combination of `a`s and `b`s in the text can discriminate a
436	/// match.
437	///
438	/// Note though that this does not compute the minimal set of equivalence
439	/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the
440	/// same equivalence class for the same reason that `a` and `b` are in the
441	/// same equivalence class in the aforementioned regex. However, in this
442	/// implementation, `a` and `c` are put into distinct equivalence classes. The
443	/// reason for this is implementation complexity. In the future, we should
444	/// endeavor to compute the minimal equivalence classes since they can have a
445	/// rather large impact on the size of the DFA. (Doing this will likely require
446	/// rethinking how equivalence classes are computed, including changing the
447	/// representation here, which is only able to group contiguous bytes into the
448	/// same equivalence class.)
449	#[derive(Clone, Debug)]
450	pub struct ByteClassSet(ByteSet);
451
452	impl ByteClassSet {
453	/// Create a new set of byte classes where all bytes are part of the same
454	/// equivalence class.
455	#[cfg(feature = "alloc")]
456	pub fn empty() -> Self {
457	ByteClassSet(ByteSet::empty())
458	}
459
460	/// Indicate the the range of byte given (inclusive) can discriminate a
461	/// match between it and all other bytes outside of the range.
462	#[cfg(feature = "alloc")]
463	pub fn set_range(&mut self, start: u8, end: u8) {
464	debug_assert!(start <= end);
465	if start > `0` {
466	self.0.add(start - `1`);
467	}
468	self.0.add(end);
469	}
470
471	/// Add the contiguous ranges in the set given to this byte class set.
472	#[cfg(feature = "alloc")]
473	pub fn add_set(&mut self, set: &ByteSet) {
474	for (start, end) in set.iter_ranges() {
475	self.set_range(start, end);
476	}
477	}
478
479	/// Convert this boolean set to a map that maps all byte values to their
480	/// corresponding equivalence class. The last mapping indicates the largest
481	/// equivalence class identifier (which is never bigger than 255).
482	#[cfg(feature = "alloc")]
483	pub fn byte_classes(&self) -> ByteClasses {
484	let mut classes = ByteClasses::empty();
485	let mut class = `0u8`;
486	let mut b = `0u8`;
487	loop {
488	classes.set(b, class);
489	if b == `255` {
490	break;
491	}
492	if self.0.contains(b) {
493	class = class.checked_add(`1`).unwrap();
494	}
495	b = b.checked_add(`1`).unwrap();
496	}
497	classes
498	}
499	}
500
501	/// A simple set of bytes that is reasonably cheap to copy and allocation free.
502	#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
503	pub struct ByteSet {
504	bits: BitSet,
505	}
506
507	/// The representation of a byte set. Split out so that we can define a
508	/// convenient Debug impl for it while keeping "ByteSet" in the output.
509	#[derive(Clone, Copy, Default, Eq, PartialEq)]
510	struct BitSet([u128; `2`]);
511
512	impl ByteSet {
513	/// Create an empty set of bytes.
514	#[cfg(feature = "alloc")]
515	pub fn empty() -> ByteSet {
516	ByteSet { bits: BitSet([`0`; `2`]) }
517	}
518
519	/// Add a byte to this set.
520	///
521	/// If the given byte already belongs to this set, then this is a no-op.
522	#[cfg(feature = "alloc")]
523	pub fn add(&mut self, byte: u8) {
524	let bucket = byte / `128`;
525	let bit = byte % `128`;
526	self.bits.0[bucket as usize] \|= `1` << bit;
527	}
528
529	/// Add an inclusive range of bytes.
530	#[cfg(feature = "alloc")]
531	pub fn add_all(&mut self, start: u8, end: u8) {
532	for b in start..=end {
533	self.add(b);
534	}
535	}
536
537	/// Remove a byte from this set.
538	///
539	/// If the given byte is not in this set, then this is a no-op.
540	#[cfg(feature = "alloc")]
541	pub fn remove(&mut self, byte: u8) {
542	let bucket = byte / `128`;
543	let bit = byte % `128`;
544	self.bits.0[bucket as usize] &= !(`1` << bit);
545	}
546
547	/// Remove an inclusive range of bytes.
548	#[cfg(feature = "alloc")]
549	pub fn remove_all(&mut self, start: u8, end: u8) {
550	for b in start..=end {
551	self.remove(b);
552	}
553	}
554
555	/// Return true if and only if the given byte is in this set.
556	pub fn contains(&self, byte: u8) -> bool {
557	let bucket = byte / `128`;
558	let bit = byte % `128`;
559	self.bits.0[bucket as usize] & (`1` << bit) > `0`
560	}
561
562	/// Return true if and only if the given inclusive range of bytes is in
563	/// this set.
564	#[cfg(feature = "alloc")]
565	pub fn contains_range(&self, start: u8, end: u8) -> bool {
566	(start..=end).all(\|b\| self.contains(b))
567	}
568
569	/// Returns an iterator over all bytes in this set.
570	#[cfg(feature = "alloc")]
571	pub fn iter(&self) -> ByteSetIter {
572	ByteSetIter { set: self, b: `0` }
573	}
574
575	/// Returns an iterator over all contiguous ranges of bytes in this set.
576	#[cfg(feature = "alloc")]
577	pub fn iter_ranges(&self) -> ByteSetRangeIter {
578	ByteSetRangeIter { set: self, b: `0` }
579	}
580
581	/// Return the number of bytes in this set.
582	#[cfg(feature = "alloc")]
583	pub fn len(&self) -> usize {
584	(self.bits.0[`0`].count_ones() + self.bits.0[`1`].count_ones()) as usize
585	}
586
587	/// Return true if and only if this set is empty.
588	#[cfg(feature = "alloc")]
589	pub fn is_empty(&self) -> bool {
590	self.bits.0 == [`0`, `0`]
591	}
592	}
593
594	impl core::fmt::Debug for BitSet {
595	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
596	let mut fmtd: DebugSet<'_, '_> = f.debug_set();
597	for b: u8 in (`0`..`256`).map(\|b: i32\| b as u8) {
598	if (ByteSet { bits: *self }).contains(byte:b) {
599	fmtd.entry(&b);
600	}
601	}
602	fmtd.finish()
603	}
604	}
605
606	#[derive(Debug)]
607	pub struct ByteSetIter<'a> {
608	set: &'a ByteSet,
609	b: usize,
610	}
611
612	impl<'a> Iterator for ByteSetIter<'a> {
613	type Item = u8;
614
615	fn next(&mut self) -> Option<u8> {
616	while self.b <= `255` {
617	let b: u8 = self.b as u8;
618	self.b += `1`;
619	if self.set.contains(byte:b) {
620	return Some(b);
621	}
622	}
623	None
624	}
625	}
626
627	#[derive(Debug)]
628	pub struct ByteSetRangeIter<'a> {
629	set: &'a ByteSet,
630	b: usize,
631	}
632
633	impl<'a> Iterator for ByteSetRangeIter<'a> {
634	type Item = (u8, u8);
635
636	fn next(&mut self) -> Option<(u8, u8)> {
637	while self.b <= `255` {
638	let start: u8 = self.b as u8;
639	self.b += `1`;
640	if !self.set.contains(byte:start) {
641	continue;
642	}
643
644	let mut end: u8 = start;
645	while self.b <= `255` && self.set.contains(self.b as u8) {
646	end = self.b as u8;
647	self.b += `1`;
648	}
649	return Some((start, end));
650	}
651	None
652	}
653	}
654
655	#[cfg(test)]
656	#[cfg(feature = "alloc")]
657	mod tests {
658	use alloc::{vec, vec::Vec};
659
660	use super::*;
661
662	#[test]
663	fn byte_classes() {
664	let mut set = ByteClassSet::empty();
665	set.set_range(b'a', b'z');
666
667	let classes = set.byte_classes();
668	assert_eq!(classes.get(`0`), `0`);
669	assert_eq!(classes.get(`1`), `0`);
670	assert_eq!(classes.get(`2`), `0`);
671	assert_eq!(classes.get(b'a' - `1`), `0`);
672	assert_eq!(classes.get(b'a'), `1`);
673	assert_eq!(classes.get(b'm'), `1`);
674	assert_eq!(classes.get(b'z'), `1`);
675	assert_eq!(classes.get(b'z' + `1`), `2`);
676	assert_eq!(classes.get(`254`), `2`);
677	assert_eq!(classes.get(`255`), `2`);
678
679	let mut set = ByteClassSet::empty();
680	set.set_range(`0`, `2`);
681	set.set_range(`4`, `6`);
682	let classes = set.byte_classes();
683	assert_eq!(classes.get(`0`), `0`);
684	assert_eq!(classes.get(`1`), `0`);
685	assert_eq!(classes.get(`2`), `0`);
686	assert_eq!(classes.get(`3`), `1`);
687	assert_eq!(classes.get(`4`), `2`);
688	assert_eq!(classes.get(`5`), `2`);
689	assert_eq!(classes.get(`6`), `2`);
690	assert_eq!(classes.get(`7`), `3`);
691	assert_eq!(classes.get(`255`), `3`);
692	}
693
694	#[test]
695	fn full_byte_classes() {
696	let mut set = ByteClassSet::empty();
697	for i in `0`..`256u16` {
698	set.set_range(i as u8, i as u8);
699	}
700	assert_eq!(set.byte_classes().alphabet_len(), `257`);
701	}
702
703	#[test]
704	fn elements_typical() {
705	let mut set = ByteClassSet::empty();
706	set.set_range(b'b', b'd');
707	set.set_range(b'g', b'm');
708	set.set_range(b'z', b'z');
709	let classes = set.byte_classes();
710	// class 0: \x00-a
711	// class 1: b-d
712	// class 2: e-f
713	// class 3: g-m
714	// class 4: n-y
715	// class 5: z-z
716	// class 6: \x7B-\xFF
717	// class 7: EOI
718	assert_eq!(classes.alphabet_len(), `8`);
719
720	let elements = classes.elements(Unit::u8(`0`)).collect::<Vec<_>>();
721	assert_eq!(elements.len(), `98`);
722	assert_eq!(elements[`0`], Unit::u8(b'`\x00`'));
723	assert_eq!(elements[`97`], Unit::u8(b'a'));
724
725	let elements = classes.elements(Unit::u8(`1`)).collect::<Vec<_>>();
726	assert_eq!(
727	elements,
728	vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')],
729	);
730
731	let elements = classes.elements(Unit::u8(`2`)).collect::<Vec<_>>();
732	assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],);
733
734	let elements = classes.elements(Unit::u8(`3`)).collect::<Vec<_>>();
735	assert_eq!(
736	elements,
737	vec![
738	Unit::u8(b'g'),
739	Unit::u8(b'h'),
740	Unit::u8(b'i'),
741	Unit::u8(b'j'),
742	Unit::u8(b'k'),
743	Unit::u8(b'l'),
744	Unit::u8(b'm'),
745	],
746	);
747
748	let elements = classes.elements(Unit::u8(`4`)).collect::<Vec<_>>();
749	assert_eq!(elements.len(), `12`);
750	assert_eq!(elements[`0`], Unit::u8(b'n'));
751	assert_eq!(elements[`11`], Unit::u8(b'y'));
752
753	let elements = classes.elements(Unit::u8(`5`)).collect::<Vec<_>>();
754	assert_eq!(elements, vec![Unit::u8(b'z')]);
755
756	let elements = classes.elements(Unit::u8(`6`)).collect::<Vec<_>>();
757	assert_eq!(elements.len(), `133`);
758	assert_eq!(elements[`0`], Unit::u8(b'`\x7B`'));
759	assert_eq!(elements[`132`], Unit::u8(b'`\xFF`'));
760
761	let elements = classes.elements(Unit::eoi(`7`)).collect::<Vec<_>>();
762	assert_eq!(elements, vec![Unit::eoi(`256`)]);
763	}
764
765	#[test]
766	fn elements_singletons() {
767	let classes = ByteClasses::singletons();
768	assert_eq!(classes.alphabet_len(), `257`);
769
770	let elements = classes.elements(Unit::u8(b'a')).collect::<Vec<_>>();
771	assert_eq!(elements, vec![Unit::u8(b'a')]);
772
773	let elements = classes.elements(Unit::eoi(`5`)).collect::<Vec<_>>();
774	assert_eq!(elements, vec![Unit::eoi(`256`)]);
775	}
776
777	#[test]
778	fn elements_empty() {
779	let classes = ByteClasses::empty();
780	assert_eq!(classes.alphabet_len(), `2`);
781
782	let elements = classes.elements(Unit::u8(`0`)).collect::<Vec<_>>();
783	assert_eq!(elements.len(), `256`);
784	assert_eq!(elements[`0`], Unit::u8(b'`\x00`'));
785	assert_eq!(elements[`255`], Unit::u8(b'`\xFF`'));
786
787	let elements = classes.elements(Unit::eoi(`1`)).collect::<Vec<_>>();
788	assert_eq!(elements, vec![Unit::eoi(`256`)]);
789	}
790	}
791