sparse.rs source code [crates/regex-automata-0.1.10/src/sparse.rs]

1	#[cfg(feature = "std")]
2	use core::fmt;
3	#[cfg(feature = "std")]
4	use core::iter;
5	use core::marker::PhantomData;
6	use core::mem::size_of;
7	#[cfg(feature = "std")]
8	use std::collections::HashMap;
9
10	#[cfg(feature = "std")]
11	use byteorder::{BigEndian, LittleEndian};
12	use byteorder::{ByteOrder, NativeEndian};
13
14	use classes::ByteClasses;
15	use dense;
16	use dfa::DFA;
17	#[cfg(feature = "std")]
18	use error::{Error, Result};
19	#[cfg(feature = "std")]
20	use state_id::{dead_id, usize_to_state_id, write_state_id_bytes, StateID};
21	#[cfg(not(feature = "std"))]
22	use state_id::{dead_id, StateID};
23
24	/// A sparse table-based deterministic finite automaton (DFA).
25	///
26	/// In contrast to a [dense DFA](enum.DenseDFA.html), a sparse DFA uses a
27	/// more space efficient representation for its transition table. Consequently,
28	/// sparse DFAs can use much less memory than dense DFAs, but this comes at a
29	/// price. In particular, reading the more space efficient transitions takes
30	/// more work, and consequently, searching using a sparse DFA is typically
31	/// slower than a dense DFA.
32	///
33	/// A sparse DFA can be built using the default configuration via the
34	/// [`SparseDFA::new`](enum.SparseDFA.html#method.new) constructor. Otherwise,
35	/// one can configure various aspects of a dense DFA via
36	/// [`dense::Builder`](dense/struct.Builder.html), and then convert a dense
37	/// DFA to a sparse DFA using
38	/// [`DenseDFA::to_sparse`](enum.DenseDFA.html#method.to_sparse).
39	///
40	/// In general, a sparse DFA supports all the same operations as a dense DFA.
41	///
42	/// Making the choice between a dense and sparse DFA depends on your specific
43	/// work load. If you can sacrifice a bit of search time performance, then a
44	/// sparse DFA might be the best choice. In particular, while sparse DFAs are
45	/// probably always slower than dense DFAs, you may find that they are easily
46	/// fast enough for your purposes!
47	///
48	/// # State size
49	///
50	/// A `SparseDFA` has two type parameters, `T` and `S`. `T` corresponds to
51	/// the type of the DFA's transition table while `S` corresponds to the
52	/// representation used for the DFA's state identifiers as described by the
53	/// [`StateID`](trait.StateID.html) trait. This type parameter is typically
54	/// `usize`, but other valid choices provided by this crate include `u8`,
55	/// `u16`, `u32` and `u64`. The primary reason for choosing a different state
56	/// identifier representation than the default is to reduce the amount of
57	/// memory used by a DFA. Note though, that if the chosen representation cannot
58	/// accommodate the size of your DFA, then building the DFA will fail and
59	/// return an error.
60	///
61	/// While the reduction in heap memory used by a DFA is one reason for choosing
62	/// a smaller state identifier representation, another possible reason is for
63	/// decreasing the serialization size of a DFA, as returned by
64	/// [`to_bytes_little_endian`](enum.SparseDFA.html#method.to_bytes_little_endian),
65	/// [`to_bytes_big_endian`](enum.SparseDFA.html#method.to_bytes_big_endian)
66	/// or
67	/// [`to_bytes_native_endian`](enum.DenseDFA.html#method.to_bytes_native_endian).
68	///
69	/// The type of the transition table is typically either `Vec<u8>` or `&[u8]`,
70	/// depending on where the transition table is stored. Note that this is
71	/// different than a dense DFA, whose transition table is typically
72	/// `Vec<S>` or `&[S]`. The reason for this is that a sparse DFA always reads
73	/// its transition table from raw bytes because the table is compactly packed.
74	///
75	/// # Variants
76	///
77	/// This DFA is defined as a non-exhaustive enumeration of different types of
78	/// dense DFAs. All of the variants use the same internal representation
79	/// for the transition table, but they vary in how the transition table is
80	/// read. A DFA's specific variant depends on the configuration options set via
81	/// [`dense::Builder`](dense/struct.Builder.html). The default variant is
82	/// `ByteClass`.
83	///
84	/// # The `DFA` trait
85	///
86	/// This type implements the [`DFA`](trait.DFA.html) trait, which means it
87	/// can be used for searching. For example:
88	///
89	/// ```
90	/// use regex_automata::{DFA, SparseDFA};
91	///
92	/// # fn example() -> Result<(), regex_automata::Error> {
93	/// let dfa = SparseDFA::new("foo[0-9]+")?;
94	/// assert_eq!(Some(`8`), dfa.find(b"foo12345"));
95	/// # Ok(()) }; example().unwrap()
96	/// ```
97	///
98	/// The `DFA` trait also provides an assortment of other lower level methods
99	/// for DFAs, such as `start_state` and `next_state`. While these are correctly
100	/// implemented, it is an anti-pattern to use them in performance sensitive
101	/// code on the `SparseDFA` type directly. Namely, each implementation requires
102	/// a branch to determine which type of sparse DFA is being used. Instead,
103	/// this branch should be pushed up a layer in the code since walking the
104	/// transitions of a DFA is usually a hot path. If you do need to use these
105	/// lower level methods in performance critical code, then you should match on
106	/// the variants of this DFA and use each variant's implementation of the `DFA`
107	/// trait directly.
108	#[derive(Clone, Debug)]
109	pub enum SparseDFA<T: AsRef<[u8]>, S: StateID = usize> {
110	/// A standard DFA that does not use byte classes.
111	Standard(Standard<T, S>),
112	/// A DFA that shrinks its alphabet to a set of equivalence classes instead
113	/// of using all possible byte values. Any two bytes belong to the same
114	/// equivalence class if and only if they can be used interchangeably
115	/// anywhere in the DFA while never discriminating between a match and a
116	/// non-match.
117	///
118	/// Unlike dense DFAs, sparse DFAs do not tend to benefit nearly as much
119	/// from using byte classes. In some cases, using byte classes can even
120	/// marginally increase the size of a sparse DFA's transition table. The
121	/// reason for this is that a sparse DFA already compacts each state's
122	/// transitions separate from whether byte classes are used.
123	ByteClass(ByteClass<T, S>),
124	/// Hints that destructuring should not be exhaustive.
125	///
126	/// This enum may grow additional variants, so this makes sure clients
127	/// don't count on exhaustive matching. (Otherwise, adding a new variant
128	/// could break existing code.)
129	#[doc(hidden)]
130	__Nonexhaustive,
131	}
132
133	#[cfg(feature = "std")]
134	impl SparseDFA<Vec<u8>, usize> {
135	/// Parse the given regular expression using a default configuration and
136	/// return the corresponding sparse DFA.
137	///
138	/// The default configuration uses `usize` for state IDs and reduces the
139	/// alphabet size by splitting bytes into equivalence classes. The
140	/// resulting DFA is not* minimized.*
141	///
142	/// If you want a non-default configuration, then use the
143	/// [`dense::Builder`](dense/struct.Builder.html)
144	/// to set your own configuration, and then call
145	/// [`DenseDFA::to_sparse`](enum.DenseDFA.html#method.to_sparse)
146	/// to create a sparse DFA.
147	///
148	/// # Example
149	///
150	/// ```
151	/// use regex_automata::{DFA, SparseDFA};
152	///
153	/// # fn example() -> Result<(), regex_automata::Error> {
154	/// let dfa = SparseDFA::new("foo[0-9]+bar")?;
155	/// assert_eq!(Some(`11`), dfa.find(b"foo12345bar"));
156	/// # Ok(()) }; example().unwrap()
157	/// ```
158	pub fn new(pattern: &str) -> Result<SparseDFA<Vec<u8>, usize>> {
159	dense::Builder::new()
160	.build(pattern)
161	.and_then(\|dense\| dense.to_sparse())
162	}
163	}
164
165	#[cfg(feature = "std")]
166	impl<S: StateID> SparseDFA<Vec<u8>, S> {
167	/// Create a new empty sparse DFA that never matches any input.
168	///
169	/// # Example
170	///
171	/// In order to build an empty DFA, callers must provide a type hint
172	/// indicating their choice of state identifier representation.
173	///
174	/// ```
175	/// use regex_automata::{DFA, SparseDFA};
176	///
177	/// # fn example() -> Result<(), regex_automata::Error> {
178	/// let dfa: SparseDFA<Vec<u8>, usize> = SparseDFA::empty();
179	/// assert_eq!(None, dfa.find(b""));
180	/// assert_eq!(None, dfa.find(b"foo"));
181	/// # Ok(()) }; example().unwrap()
182	/// ```
183	pub fn empty() -> SparseDFA<Vec<u8>, S> {
184	dense::DenseDFA::empty().to_sparse().unwrap()
185	}
186
187	pub(crate) fn from_dense_sized<T: AsRef<[S]>, A: StateID>(
188	dfa: &dense::Repr<T, S>,
189	) -> Result<SparseDFA<Vec<u8>, A>> {
190	Repr::from_dense_sized(dfa).map(\|r\| r.into_sparse_dfa())
191	}
192	}
193
194	impl<T: AsRef<[u8]>, S: StateID> SparseDFA<T, S> {
195	/// Cheaply return a borrowed version of this sparse DFA. Specifically, the
196	/// DFA returned always uses `&[u8]` for its transition table while keeping
197	/// the same state identifier representation.
198	pub fn as_ref<'a>(&'a self) -> SparseDFA<&'a [u8], S> {
199	match *self {
200	SparseDFA::Standard(Standard(ref r)) => {
201	SparseDFA::Standard(Standard(r.as_ref()))
202	}
203	SparseDFA::ByteClass(ByteClass(ref r)) => {
204	SparseDFA::ByteClass(ByteClass(r.as_ref()))
205	}
206	SparseDFA::__Nonexhaustive => unreachable!(),
207	}
208	}
209
210	/// Return an owned version of this sparse DFA. Specifically, the DFA
211	/// returned always uses `Vec<u8>` for its transition table while keeping
212	/// the same state identifier representation.
213	///
214	/// Effectively, this returns a sparse DFA whose transition table lives
215	/// on the heap.
216	#[cfg(feature = "std")]
217	pub fn to_owned(&self) -> SparseDFA<Vec<u8>, S> {
218	match *self {
219	SparseDFA::Standard(Standard(ref r)) => {
220	SparseDFA::Standard(Standard(r.to_owned()))
221	}
222	SparseDFA::ByteClass(ByteClass(ref r)) => {
223	SparseDFA::ByteClass(ByteClass(r.to_owned()))
224	}
225	SparseDFA::__Nonexhaustive => unreachable!(),
226	}
227	}
228
229	/// Returns the memory usage, in bytes, of this DFA.
230	///
231	/// The memory usage is computed based on the number of bytes used to
232	/// represent this DFA's transition table. This typically corresponds to
233	/// heap memory usage.
234	///
235	/// This does not* include the stack size used up by this DFA. To*
236	/// compute that, used `std::mem::size_of::<SparseDFA>()`.
237	pub fn memory_usage(&self) -> usize {
238	self.repr().memory_usage()
239	}
240
241	fn repr(&self) -> &Repr<T, S> {
242	match *self {
243	SparseDFA::Standard(ref r) => &r.0,
244	SparseDFA::ByteClass(ref r) => &r.0,
245	SparseDFA::__Nonexhaustive => unreachable!(),
246	}
247	}
248	}
249
250	/// Routines for converting a sparse DFA to other representations, such as
251	/// smaller state identifiers or raw bytes suitable for persistent storage.
252	#[cfg(feature = "std")]
253	impl<T: AsRef<[u8]>, S: StateID> SparseDFA<T, S> {
254	/// Create a new sparse DFA whose match semantics are equivalent to
255	/// this DFA, but attempt to use `u8` for the representation of state
256	/// identifiers. If `u8` is insufficient to represent all state identifiers
257	/// in this DFA, then this returns an error.
258	///
259	/// This is a convenience routine for `to_sized::<u8>()`.
260	pub fn to_u8(&self) -> Result<SparseDFA<Vec<u8>, u8>> {
261	self.to_sized()
262	}
263
264	/// Create a new sparse DFA whose match semantics are equivalent to
265	/// this DFA, but attempt to use `u16` for the representation of state
266	/// identifiers. If `u16` is insufficient to represent all state
267	/// identifiers in this DFA, then this returns an error.
268	///
269	/// This is a convenience routine for `to_sized::<u16>()`.
270	pub fn to_u16(&self) -> Result<SparseDFA<Vec<u8>, u16>> {
271	self.to_sized()
272	}
273
274	/// Create a new sparse DFA whose match semantics are equivalent to
275	/// this DFA, but attempt to use `u32` for the representation of state
276	/// identifiers. If `u32` is insufficient to represent all state
277	/// identifiers in this DFA, then this returns an error.
278	///
279	/// This is a convenience routine for `to_sized::<u32>()`.
280	#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
281	pub fn to_u32(&self) -> Result<SparseDFA<Vec<u8>, u32>> {
282	self.to_sized()
283	}
284
285	/// Create a new sparse DFA whose match semantics are equivalent to
286	/// this DFA, but attempt to use `u64` for the representation of state
287	/// identifiers. If `u64` is insufficient to represent all state
288	/// identifiers in this DFA, then this returns an error.
289	///
290	/// This is a convenience routine for `to_sized::<u64>()`.
291	#[cfg(target_pointer_width = "64")]
292	pub fn to_u64(&self) -> Result<SparseDFA<Vec<u8>, u64>> {
293	self.to_sized()
294	}
295
296	/// Create a new sparse DFA whose match semantics are equivalent to
297	/// this DFA, but attempt to use `A` for the representation of state
298	/// identifiers. If `A` is insufficient to represent all state identifiers
299	/// in this DFA, then this returns an error.
300	///
301	/// An alternative way to construct such a DFA is to use
302	/// [`DenseDFA::to_sparse_sized`](enum.DenseDFA.html#method.to_sparse_sized).
303	/// In general, picking the appropriate size upon initial construction of
304	/// a sparse DFA is preferred, since it will do the conversion in one
305	/// step instead of two.
306	pub fn to_sized<A: StateID>(&self) -> Result<SparseDFA<Vec<u8>, A>> {
307	self.repr().to_sized().map(\|r\| r.into_sparse_dfa())
308	}
309
310	/// Serialize a sparse DFA to raw bytes in little endian format.
311	///
312	/// If the state identifier representation of this DFA has a size different
313	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
314	/// implementations of `StateID` provided by this crate satisfy this
315	/// requirement.
316	pub fn to_bytes_little_endian(&self) -> Result<Vec<u8>> {
317	self.repr().to_bytes::<LittleEndian>()
318	}
319
320	/// Serialize a sparse DFA to raw bytes in big endian format.
321	///
322	/// If the state identifier representation of this DFA has a size different
323	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
324	/// implementations of `StateID` provided by this crate satisfy this
325	/// requirement.
326	pub fn to_bytes_big_endian(&self) -> Result<Vec<u8>> {
327	self.repr().to_bytes::<BigEndian>()
328	}
329
330	/// Serialize a sparse DFA to raw bytes in native endian format.
331	/// Generally, it is better to pick an explicit endianness using either
332	/// `to_bytes_little_endian` or `to_bytes_big_endian`. This routine is
333	/// useful in tests where the DFA is serialized and deserialized on the
334	/// same platform.
335	///
336	/// If the state identifier representation of this DFA has a size different
337	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
338	/// implementations of `StateID` provided by this crate satisfy this
339	/// requirement.
340	pub fn to_bytes_native_endian(&self) -> Result<Vec<u8>> {
341	self.repr().to_bytes::<NativeEndian>()
342	}
343	}
344
345	impl<'a, S: StateID> SparseDFA<&'a [u8], S> {
346	/// Deserialize a sparse DFA with a specific state identifier
347	/// representation.
348	///
349	/// Deserializing a DFA using this routine will never allocate heap memory.
350	/// This is also guaranteed to be a constant time operation that does not
351	/// vary with the size of the DFA.
352	///
353	/// The bytes given should be generated by the serialization of a DFA with
354	/// either the
355	/// [`to_bytes_little_endian`](enum.DenseDFA.html#method.to_bytes_little_endian)
356	/// method or the
357	/// [`to_bytes_big_endian`](enum.DenseDFA.html#method.to_bytes_big_endian)
358	/// endian, depending on the endianness of the machine you are
359	/// deserializing this DFA from.
360	///
361	/// If the state identifier representation is `usize`, then deserialization
362	/// is dependent on the pointer size. For this reason, it is best to
363	/// serialize DFAs using a fixed size representation for your state
364	/// identifiers, such as `u8`, `u16`, `u32` or `u64`.
365	///
366	/// # Panics
367	///
368	/// The bytes given should be trusted. In particular, if the bytes
369	/// are not a valid serialization of a DFA, or if the endianness of the
370	/// serialized bytes is different than the endianness of the machine that
371	/// is deserializing the DFA, then this routine will panic. Moreover, it
372	/// is possible for this deserialization routine to succeed even if the
373	/// given bytes do not represent a valid serialized sparse DFA.
374	///
375	/// # Safety
376	///
377	/// This routine is unsafe because it permits callers to provide an
378	/// arbitrary transition table with possibly incorrect transitions. While
379	/// the various serialization routines will never return an incorrect
380	/// transition table, there is no guarantee that the bytes provided here
381	/// are correct. While deserialization does many checks (as documented
382	/// above in the panic conditions), this routine does not check that the
383	/// transition table is correct. Given an incorrect transition table, it is
384	/// possible for the search routines to access out-of-bounds memory because
385	/// of explicit bounds check elision.
386	///
387	/// # Example
388	///
389	/// This example shows how to serialize a DFA to raw bytes, deserialize it
390	/// and then use it for searching. Note that we first convert the DFA to
391	/// using `u16` for its state identifier representation before serializing
392	/// it. While this isn't strictly necessary, it's good practice in order to
393	/// decrease the size of the DFA and to avoid platform specific pitfalls
394	/// such as differing pointer sizes.
395	///
396	/// ```
397	/// use regex_automata::{DFA, DenseDFA, SparseDFA};
398	///
399	/// # fn example() -> Result<(), regex_automata::Error> {
400	/// let sparse = SparseDFA::new("foo[0-9]+")?;
401	/// let bytes = sparse.to_u16()?.to_bytes_native_endian()?;
402	///
403	/// let dfa: SparseDFA<&[u8], u16> = unsafe {
404	/// SparseDFA::from_bytes(&bytes)
405	/// };
406	///
407	/// assert_eq!(Some(`8`), dfa.find(b"foo12345"));
408	/// # Ok(()) }; example().unwrap()
409	/// ```
410	pub unsafe fn from_bytes(buf: &'a [u8]) -> SparseDFA<&'a [u8], S> {
411	Repr::from_bytes(buf).into_sparse_dfa()
412	}
413	}
414
415	impl<T: AsRef<[u8]>, S: StateID> DFA for SparseDFA<T, S> {
416	type ID = S;
417
418	#[inline]
419	fn start_state(&self) -> S {
420	self.repr().start_state()
421	}
422
423	#[inline]
424	fn is_match_state(&self, id: S) -> bool {
425	self.repr().is_match_state(id)
426	}
427
428	#[inline]
429	fn is_dead_state(&self, id: S) -> bool {
430	self.repr().is_dead_state(id)
431	}
432
433	#[inline]
434	fn is_match_or_dead_state(&self, id: S) -> bool {
435	self.repr().is_match_or_dead_state(id)
436	}
437
438	#[inline]
439	fn is_anchored(&self) -> bool {
440	self.repr().is_anchored()
441	}
442
443	#[inline]
444	fn next_state(&self, current: S, input: u8) -> S {
445	match *self {
446	SparseDFA::Standard(ref r) => r.next_state(current, input),
447	SparseDFA::ByteClass(ref r) => r.next_state(current, input),
448	SparseDFA::__Nonexhaustive => unreachable!(),
449	}
450	}
451
452	#[inline]
453	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
454	self.next_state(current, input)
455	}
456
457	// We specialize the following methods because it lets us lift the
458	// case analysis between the different types of sparse DFAs. Instead of
459	// doing the case analysis for every transition, we do it once before
460	// searching. For sparse DFAs, this doesn't seem to benefit performance as
461	// much as it does for the dense DFAs, but it's easy to do so we might as
462	// well do it.
463
464	#[inline]
465	fn is_match_at(&self, bytes: &[u8], start: usize) -> bool {
466	match *self {
467	SparseDFA::Standard(ref r) => r.is_match_at(bytes, start),
468	SparseDFA::ByteClass(ref r) => r.is_match_at(bytes, start),
469	SparseDFA::__Nonexhaustive => unreachable!(),
470	}
471	}
472
473	#[inline]
474	fn shortest_match_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
475	match *self {
476	SparseDFA::Standard(ref r) => r.shortest_match_at(bytes, start),
477	SparseDFA::ByteClass(ref r) => r.shortest_match_at(bytes, start),
478	SparseDFA::__Nonexhaustive => unreachable!(),
479	}
480	}
481
482	#[inline]
483	fn find_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
484	match *self {
485	SparseDFA::Standard(ref r) => r.find_at(bytes, start),
486	SparseDFA::ByteClass(ref r) => r.find_at(bytes, start),
487	SparseDFA::__Nonexhaustive => unreachable!(),
488	}
489	}
490
491	#[inline]
492	fn rfind_at(&self, bytes: &[u8], start: usize) -> Option<usize> {
493	match *self {
494	SparseDFA::Standard(ref r) => r.rfind_at(bytes, start),
495	SparseDFA::ByteClass(ref r) => r.rfind_at(bytes, start),
496	SparseDFA::__Nonexhaustive => unreachable!(),
497	}
498	}
499	}
500
501	/// A standard sparse DFA that does not use premultiplication or byte classes.
502	///
503	/// Generally, it isn't necessary to use this type directly, since a
504	/// `SparseDFA` can be used for searching directly. One possible reason why
505	/// one might want to use this type directly is if you are implementing your
506	/// own search routines by walking a DFA's transitions directly. In that case,
507	/// you'll want to use this type (or any of the other DFA variant types)
508	/// directly, since they implement `next_state` more efficiently.
509	#[derive(Clone, Debug)]
510	pub struct Standard<T: AsRef<[u8]>, S: StateID = usize>(Repr<T, S>);
511
512	impl<T: AsRef<[u8]>, S: StateID> DFA for Standard<T, S> {
513	type ID = S;
514
515	#[inline]
516	fn start_state(&self) -> S {
517	self.0.start_state()
518	}
519
520	#[inline]
521	fn is_match_state(&self, id: S) -> bool {
522	self.0.is_match_state(id)
523	}
524
525	#[inline]
526	fn is_dead_state(&self, id: S) -> bool {
527	self.0.is_dead_state(id)
528	}
529
530	#[inline]
531	fn is_match_or_dead_state(&self, id: S) -> bool {
532	self.0.is_match_or_dead_state(id)
533	}
534
535	#[inline]
536	fn is_anchored(&self) -> bool {
537	self.0.is_anchored()
538	}
539
540	#[inline]
541	fn next_state(&self, current: S, input: u8) -> S {
542	self.0.state(current).next(input)
543	}
544
545	#[inline]
546	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
547	self.next_state(current, input)
548	}
549	}
550
551	/// A sparse DFA that shrinks its alphabet.
552	///
553	/// Alphabet shrinking is achieved by using a set of equivalence classes
554	/// instead of using all possible byte values. Any two bytes belong to the same
555	/// equivalence class if and only if they can be used interchangeably anywhere
556	/// in the DFA while never discriminating between a match and a non-match.
557	///
558	/// Unlike dense DFAs, sparse DFAs do not tend to benefit nearly as much from
559	/// using byte classes. In some cases, using byte classes can even marginally
560	/// increase the size of a sparse DFA's transition table. The reason for this
561	/// is that a sparse DFA already compacts each state's transitions separate
562	/// from whether byte classes are used.
563	///
564	/// Generally, it isn't necessary to use this type directly, since a
565	/// `SparseDFA` can be used for searching directly. One possible reason why
566	/// one might want to use this type directly is if you are implementing your
567	/// own search routines by walking a DFA's transitions directly. In that case,
568	/// you'll want to use this type (or any of the other DFA variant types)
569	/// directly, since they implement `next_state` more efficiently.
570	#[derive(Clone, Debug)]
571	pub struct ByteClass<T: AsRef<[u8]>, S: StateID = usize>(Repr<T, S>);
572
573	impl<T: AsRef<[u8]>, S: StateID> DFA for ByteClass<T, S> {
574	type ID = S;
575
576	#[inline]
577	fn start_state(&self) -> S {
578	self.0.start_state()
579	}
580
581	#[inline]
582	fn is_match_state(&self, id: S) -> bool {
583	self.0.is_match_state(id)
584	}
585
586	#[inline]
587	fn is_dead_state(&self, id: S) -> bool {
588	self.0.is_dead_state(id)
589	}
590
591	#[inline]
592	fn is_match_or_dead_state(&self, id: S) -> bool {
593	self.0.is_match_or_dead_state(id)
594	}
595
596	#[inline]
597	fn is_anchored(&self) -> bool {
598	self.0.is_anchored()
599	}
600
601	#[inline]
602	fn next_state(&self, current: S, input: u8) -> S {
603	let input = self.0.byte_classes.get(input);
604	self.0.state(current).next(input)
605	}
606
607	#[inline]
608	unsafe fn next_state_unchecked(&self, current: S, input: u8) -> S {
609	self.next_state(current, input)
610	}
611	}
612
613	/// The underlying representation of a sparse DFA. This is shared by all of
614	/// the different variants of a sparse DFA.
615	#[derive(Clone)]
616	#[cfg_attr(not(feature = "std"), derive(Debug))]
617	struct Repr<T: AsRef<[u8]>, S: StateID = usize> {
618	anchored: bool,
619	start: S,
620	state_count: usize,
621	max_match: S,
622	byte_classes: ByteClasses,
623	trans: T,
624	}
625
626	impl<T: AsRef<[u8]>, S: StateID> Repr<T, S> {
627	fn into_sparse_dfa(self) -> SparseDFA<T, S> {
628	if self.byte_classes.is_singleton() {
629	SparseDFA::Standard(Standard(self))
630	} else {
631	SparseDFA::ByteClass(ByteClass(self))
632	}
633	}
634
635	fn as_ref<'a>(&'a self) -> Repr<&'a [u8], S> {
636	Repr {
637	anchored: self.anchored,
638	start: self.start,
639	state_count: self.state_count,
640	max_match: self.max_match,
641	byte_classes: self.byte_classes.clone(),
642	trans: self.trans(),
643	}
644	}
645
646	#[cfg(feature = "std")]
647	fn to_owned(&self) -> Repr<Vec<u8>, S> {
648	Repr {
649	anchored: self.anchored,
650	start: self.start,
651	state_count: self.state_count,
652	max_match: self.max_match,
653	byte_classes: self.byte_classes.clone(),
654	trans: self.trans().to_vec(),
655	}
656	}
657
658	/// Return a convenient representation of the given state.
659	///
660	/// This is marked as inline because it doesn't seem to get inlined
661	/// otherwise, which leads to a fairly significant performance loss (~25%).
662	#[inline]
663	fn state<'a>(&'a self, id: S) -> State<'a, S> {
664	let mut pos = id.to_usize();
665	let ntrans = NativeEndian::read_u16(&self.trans()[pos..]) as usize;
666	pos += `2`;
667	let input_ranges = &self.trans()[pos..pos + (ntrans * `2`)];
668	pos += `2` * ntrans;
669	let next = &self.trans()[pos..pos + (ntrans * size_of::<S>())];
670	State { _state_id_repr: PhantomData, ntrans, input_ranges, next }
671	}
672
673	/// Return an iterator over all of the states in this DFA.
674	///
675	/// The iterator returned yields tuples, where the first element is the
676	/// state ID and the second element is the state itself.
677	#[cfg(feature = "std")]
678	fn states<'a>(&'a self) -> StateIter<'a, T, S> {
679	StateIter { dfa: self, id: dead_id() }
680	}
681
682	fn memory_usage(&self) -> usize {
683	self.trans().len()
684	}
685
686	fn start_state(&self) -> S {
687	self.start
688	}
689
690	fn is_match_state(&self, id: S) -> bool {
691	self.is_match_or_dead_state(id) && !self.is_dead_state(id)
692	}
693
694	fn is_dead_state(&self, id: S) -> bool {
695	id == dead_id()
696	}
697
698	fn is_match_or_dead_state(&self, id: S) -> bool {
699	id <= self.max_match
700	}
701
702	fn is_anchored(&self) -> bool {
703	self.anchored
704	}
705
706	fn trans(&self) -> &[u8] {
707	self.trans.as_ref()
708	}
709
710	/// Create a new sparse DFA whose match semantics are equivalent to this
711	/// DFA, but attempt to use `A` for the representation of state
712	/// identifiers. If `A` is insufficient to represent all state identifiers
713	/// in this DFA, then this returns an error.
714	#[cfg(feature = "std")]
715	fn to_sized<A: StateID>(&self) -> Result<Repr<Vec<u8>, A>> {
716	// To build the new DFA, we proceed much like the initial construction
717	// of the sparse DFA. Namely, since the state ID size is changing,
718	// we don't actually know all of our state IDs until we've allocated
719	// all necessary space. So we do one pass that allocates all of the
720	// storage we need, and then another pass to fill in the transitions.
721
722	let mut trans = Vec::with_capacity(size_of::<A>() * self.state_count);
723	let mut map: HashMap<S, A> = HashMap::with_capacity(self.state_count);
724	for (old_id, state) in self.states() {
725	let pos = trans.len();
726	map.insert(old_id, usize_to_state_id(pos)?);
727
728	let n = state.ntrans;
729	let zeros = `2` + (n * `2`) + (n * size_of::<A>());
730	trans.extend(iter::repeat(`0`).take(zeros));
731
732	NativeEndian::write_u16(&mut trans[pos..], n as u16);
733	let (s, e) = (pos + `2`, pos + `2` + (n * `2`));
734	trans[s..e].copy_from_slice(state.input_ranges);
735	}
736
737	let mut new = Repr {
738	anchored: self.anchored,
739	start: map[&self.start],
740	state_count: self.state_count,
741	max_match: map[&self.max_match],
742	byte_classes: self.byte_classes.clone(),
743	trans,
744	};
745	for (&old_id, &new_id) in map.iter() {
746	let old_state = self.state(old_id);
747	let mut new_state = new.state_mut(new_id);
748	for i in `0`..new_state.ntrans {
749	let next = map[&old_state.next_at(i)];
750	new_state.set_next_at(i, usize_to_state_id(next.to_usize())?);
751	}
752	}
753	new.start = map[&self.start];
754	new.max_match = map[&self.max_match];
755	Ok(new)
756	}
757
758	/// Serialize a sparse DFA to raw bytes using the provided endianness.
759	///
760	/// If the state identifier representation of this DFA has a size different
761	/// than 1, 2, 4 or 8 bytes, then this returns an error. All
762	/// implementations of `StateID` provided by this crate satisfy this
763	/// requirement.
764	///
765	/// Unlike dense DFAs, the result is not necessarily aligned since a
766	/// sparse DFA's transition table is always read as a sequence of bytes.
767	#[cfg(feature = "std")]
768	fn to_bytes<A: ByteOrder>(&self) -> Result<Vec<u8>> {
769	let label = b"rust-regex-automata-sparse-dfa`\x00`";
770	let size =
771	// For human readable label.
772	label.len()
773	// endiannes check, must be equal to 0xFEFF for native endian
774	+ `2`
775	// For version number.
776	+ `2`
777	// Size of state ID representation, in bytes.
778	// Must be 1, 2, 4 or 8.
779	+ `2`
780	// For DFA misc options. (Currently unused.)
781	+ `2`
782	// For start state.
783	+ `8`
784	// For state count.
785	+ `8`
786	// For max match state.
787	+ `8`
788	// For byte class map.
789	+ `256`
790	// For transition table.
791	+ self.trans().len();
792
793	let mut i = `0`;
794	let mut buf = vec![`0`; size];
795
796	// write label
797	for &b in label {
798	buf[i] = b;
799	i += `1`;
800	}
801	// endianness check
802	A::write_u16(&mut buf[i..], `0xFEFF`);
803	i += `2`;
804	// version number
805	A::write_u16(&mut buf[i..], `1`);
806	i += `2`;
807	// size of state ID
808	let state_size = size_of::<S>();
809	if ![`1`, `2`, `4`, `8`].contains(&state_size) {
810	return Err(Error::serialize(&format!(
811	"state size of {} not supported, must be 1, 2, 4 or 8",
812	state_size
813	)));
814	}
815	A::write_u16(&mut buf[i..], state_size as u16);
816	i += `2`;
817	// DFA misc options
818	let mut options = `0u16`;
819	if self.anchored {
820	options \|= dense::MASK_ANCHORED;
821	}
822	A::write_u16(&mut buf[i..], options);
823	i += `2`;
824	// start state
825	A::write_u64(&mut buf[i..], self.start.to_usize() as u64);
826	i += `8`;
827	// state count
828	A::write_u64(&mut buf[i..], self.state_count as u64);
829	i += `8`;
830	// max match state
831	A::write_u64(&mut buf[i..], self.max_match.to_usize() as u64);
832	i += `8`;
833	// byte class map
834	for b in (`0`..`256`).map(\|b\| b as u8) {
835	buf[i] = self.byte_classes.get(b);
836	i += `1`;
837	}
838	// transition table
839	for (_, state) in self.states() {
840	A::write_u16(&mut buf[i..], state.ntrans as u16);
841	i += `2`;
842	buf[i..i + (state.ntrans * `2`)].copy_from_slice(state.input_ranges);
843	i += state.ntrans * `2`;
844	for j in `0`..state.ntrans {
845	write_state_id_bytes::<A, _>(&mut buf[i..], state.next_at(j));
846	i += size_of::<S>();
847	}
848	}
849
850	assert_eq!(size, i, "expected to consume entire buffer");
851
852	Ok(buf)
853	}
854	}
855
856	impl<'a, S: StateID> Repr<&'a [u8], S> {
857	/// The implementation for deserializing a sparse DFA from raw bytes.
858	unsafe fn from_bytes(mut buf: &'a [u8]) -> Repr<&'a [u8], S> {
859	// skip over label
860	match buf.iter().position(\|&b\| b == b'`\x00`') {
861	None => panic!("could not find label"),
862	Some(i) => buf = &buf[i + `1`..],
863	}
864
865	// check that current endianness is same as endianness of DFA
866	let endian_check = NativeEndian::read_u16(buf);
867	buf = &buf[`2`..];
868	if endian_check != `0xFEFF` {
869	panic!(
870	"endianness mismatch, expected 0xFEFF but got 0x{:X}. \
871	are you trying to load a SparseDFA serialized with a \
872	different endianness?",
873	endian_check,
874	);
875	}
876
877	// check that the version number is supported
878	let version = NativeEndian::read_u16(buf);
879	buf = &buf[`2`..];
880	if version != `1` {
881	panic!(
882	"expected version 1, but found unsupported version {}",
883	version,
884	);
885	}
886
887	// read size of state
888	let state_size = NativeEndian::read_u16(buf) as usize;
889	if state_size != size_of::<S>() {
890	panic!(
891	"state size of SparseDFA ({}) does not match \
892	requested state size ({})",
893	state_size,
894	size_of::<S>(),
895	);
896	}
897	buf = &buf[`2`..];
898
899	// read miscellaneous options
900	let opts = NativeEndian::read_u16(buf);
901	buf = &buf[`2`..];
902
903	// read start state
904	let start = S::from_usize(NativeEndian::read_u64(buf) as usize);
905	buf = &buf[`8`..];
906
907	// read state count
908	let state_count = NativeEndian::read_u64(buf) as usize;
909	buf = &buf[`8`..];
910
911	// read max match state
912	let max_match = S::from_usize(NativeEndian::read_u64(buf) as usize);
913	buf = &buf[`8`..];
914
915	// read byte classes
916	let byte_classes = ByteClasses::from_slice(&buf[..`256`]);
917	buf = &buf[`256`..];
918
919	Repr {
920	anchored: opts & dense::MASK_ANCHORED > `0`,
921	start,
922	state_count,
923	max_match,
924	byte_classes,
925	trans: buf,
926	}
927	}
928	}
929
930	#[cfg(feature = "std")]
931	impl<S: StateID> Repr<Vec<u8>, S> {
932	/// The implementation for constructing a sparse DFA from a dense DFA.
933	fn from_dense_sized<T: AsRef<[S]>, A: StateID>(
934	dfa: &dense::Repr<T, S>,
935	) -> Result<Repr<Vec<u8>, A>> {
936	// In order to build the transition table, we need to be able to write
937	// state identifiers for each of the "next" transitions in each state.
938	// Our state identifiers correspond to the byte offset in the
939	// transition table at which the state is encoded. Therefore, we do not
940	// actually know what the state identifiers are until we've allocated
941	// exactly as much space as we need for each state. Thus, construction
942	// of the transition table happens in two passes.
943	//
944	// In the first pass, we fill out the shell of each state, which
945	// includes the transition count, the input byte ranges and zero-filled
946	// space for the transitions. In this first pass, we also build up a
947	// map from the state identifier index of the dense DFA to the state
948	// identifier in this sparse DFA.
949	//
950	// In the second pass, we fill in the transitions based on the map
951	// built in the first pass.
952
953	let mut trans = Vec::with_capacity(size_of::<A>() * dfa.state_count());
954	let mut remap: Vec<A> = vec![dead_id(); dfa.state_count()];
955	for (old_id, state) in dfa.states() {
956	let pos = trans.len();
957
958	remap[dfa.state_id_to_index(old_id)] = usize_to_state_id(pos)?;
959	// zero-filled space for the transition count
960	trans.push(`0`);
961	trans.push(`0`);
962
963	let mut trans_count = `0`;
964	for (b1, b2, _) in state.sparse_transitions() {
965	trans_count += `1`;
966	trans.push(b1);
967	trans.push(b2);
968	}
969	// fill in the transition count
970	NativeEndian::write_u16(&mut trans[pos..], trans_count);
971
972	// zero-fill the actual transitions
973	let zeros = trans_count as usize * size_of::<A>();
974	trans.extend(iter::repeat(`0`).take(zeros));
975	}
976
977	let mut new = Repr {
978	anchored: dfa.is_anchored(),
979	start: remap[dfa.state_id_to_index(dfa.start_state())],
980	state_count: dfa.state_count(),
981	max_match: remap[dfa.state_id_to_index(dfa.max_match_state())],
982	byte_classes: dfa.byte_classes().clone(),
983	trans,
984	};
985	for (old_id, old_state) in dfa.states() {
986	let new_id = remap[dfa.state_id_to_index(old_id)];
987	let mut new_state = new.state_mut(new_id);
988	let sparse = old_state.sparse_transitions();
989	for (i, (_, _, next)) in sparse.enumerate() {
990	let next = remap[dfa.state_id_to_index(next)];
991	new_state.set_next_at(i, next);
992	}
993	}
994	Ok(new)
995	}
996
997	/// Return a convenient mutable representation of the given state.
998	fn state_mut<'a>(&'a mut self, id: S) -> StateMut<'a, S> {
999	let mut pos = id.to_usize();
1000	let ntrans = NativeEndian::read_u16(&self.trans[pos..]) as usize;
1001	pos += `2`;
1002
1003	let size = (ntrans * `2`) + (ntrans * size_of::<S>());
1004	let ranges_and_next = &mut self.trans[pos..pos + size];
1005	let (input_ranges, next) = ranges_and_next.split_at_mut(ntrans * `2`);
1006	StateMut { _state_id_repr: PhantomData, ntrans, input_ranges, next }
1007	}
1008	}
1009
1010	#[cfg(feature = "std")]
1011	impl<T: AsRef<[u8]>, S: StateID> fmt::Debug for Repr<T, S> {
1012	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1013	fn state_status<T: AsRef<[u8]>, S: StateID>(
1014	dfa: &Repr<T, S>,
1015	id: S,
1016	) -> &'static str {
1017	if id == dead_id() {
1018	if dfa.is_match_state(id) {
1019	"D*"
1020	} else {
1021	"D "
1022	}
1023	} else if id == dfa.start_state() {
1024	if dfa.is_match_state(id) {
1025	">*"
1026	} else {
1027	"> "
1028	}
1029	} else {
1030	if dfa.is_match_state(id) {
1031	" *"
1032	} else {
1033	" "
1034	}
1035	}
1036	}
1037
1038	writeln!(f, "SparseDFA(")?;
1039	for (id, state) in self.states() {
1040	let status = state_status(self, id);
1041	writeln!(f, "{}{:`06`}: {:?}", status, id.to_usize(), state)?;
1042	}
1043	writeln!(f, ")")?;
1044	Ok(())
1045	}
1046	}
1047
1048	/// An iterator over all states in a sparse DFA.
1049	///
1050	/// This iterator yields tuples, where the first element is the state ID and
1051	/// the second element is the state itself.
1052	#[cfg(feature = "std")]
1053	#[derive(Debug)]
1054	struct StateIter<'a, T: AsRef<[u8]> + 'a, S: StateID + 'a = usize> {
1055	dfa: &'a Repr<T, S>,
1056	id: S,
1057	}
1058
1059	#[cfg(feature = "std")]
1060	impl<'a, T: AsRef<[u8]>, S: StateID> Iterator for StateIter<'a, T, S> {
1061	type Item = (S, State<'a, S>);
1062
1063	fn next(&mut self) -> Option<(S, State<'a, S>)> {
1064	if self.id.to_usize() >= self.dfa.trans().len() {
1065	return None;
1066	}
1067	let id: S = self.id;
1068	let state: State<'_, S> = self.dfa.state(id);
1069	self.id = S::from_usize(self.id.to_usize() + state.bytes());
1070	Some((id, state))
1071	}
1072	}
1073
1074	/// A representation of a sparse DFA state that can be cheaply materialized
1075	/// from a state identifier.
1076	#[derive(Clone)]
1077	struct State<'a, S: StateID = usize> {
1078	/// The state identifier representation used by the DFA from which this
1079	/// state was extracted. Since our transition table is compacted in a
1080	/// &[u8], we don't actually use the state ID type parameter explicitly
1081	/// anywhere, so we fake it. This prevents callers from using an incorrect
1082	/// state ID representation to read from this state.
1083	_state_id_repr: PhantomData<S>,
1084	/// The number of transitions in this state.
1085	ntrans: usize,
1086	/// Pairs of input ranges, where there is one pair for each transition.
1087	/// Each pair specifies an inclusive start and end byte range for the
1088	/// corresponding transition.
1089	input_ranges: &'a [u8],
1090	/// Transitions to the next state. This slice contains native endian
1091	/// encoded state identifiers, with `S` as the representation. Thus, there
1092	/// are `ntrans size_of::<S>()` bytes in this slice.*
1093	next: &'a [u8],
1094	}
1095
1096	impl<'a, S: StateID> State<'a, S> {
1097	/// Searches for the next transition given an input byte. If no such
1098	/// transition could be found, then a dead state is returned.
1099	fn next(&self, input: u8) -> S {
1100	// This straight linear search was observed to be much better than
1101	// binary search on ASCII haystacks, likely because a binary search
1102	// visits the ASCII case last but a linear search sees it first. A
1103	// binary search does do a little better on non-ASCII haystacks, but
1104	// not by much. There might be a better trade off lurking here.
1105	for i in `0`..self.ntrans {
1106	let (start, end) = self.range(i);
1107	if start <= input && input <= end {
1108	return self.next_at(i);
1109	}
1110	// We could bail early with an extra branch: if input < b1, then
1111	// we know we'll never find a matching transition. Interestingly,
1112	// this extra branch seems to not help performance, or will even
1113	// hurt it. It's likely very dependent on the DFA itself and what
1114	// is being searched.
1115	}
1116	dead_id()
1117	}
1118
1119	/// Returns the inclusive input byte range for the ith transition in this
1120	/// state.
1121	fn range(&self, i: usize) -> (u8, u8) {
1122	(self.input_ranges[i * `2`], self.input_ranges[i * `2` + `1`])
1123	}
1124
1125	/// Returns the next state for the ith transition in this state.
1126	fn next_at(&self, i: usize) -> S {
1127	S::read_bytes(&self.next[i * size_of::<S>()..])
1128	}
1129
1130	/// Return the total number of bytes that this state consumes in its
1131	/// encoded form.
1132	#[cfg(feature = "std")]
1133	fn bytes(&self) -> usize {
1134	`2` + (self.ntrans * `2`) + (self.ntrans * size_of::<S>())
1135	}
1136	}
1137
1138	#[cfg(feature = "std")]
1139	impl<'a, S: StateID> fmt::Debug for State<'a, S> {
1140	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1141	let mut transitions = vec![];
1142	for i in `0`..self.ntrans {
1143	let next = self.next_at(i);
1144	if next == dead_id() {
1145	continue;
1146	}
1147
1148	let (start, end) = self.range(i);
1149	if start == end {
1150	transitions.push(format!(
1151	"{} => {}",
1152	escape(start),
1153	next.to_usize()
1154	));
1155	} else {
1156	transitions.push(format!(
1157	"{}-{} => {}",
1158	escape(start),
1159	escape(end),
1160	next.to_usize(),
1161	));
1162	}
1163	}
1164	write!(f, "{}", transitions.join(", "))
1165	}
1166	}
1167
1168	/// A representation of a mutable sparse DFA state that can be cheaply
1169	/// materialized from a state identifier.
1170	#[cfg(feature = "std")]
1171	struct StateMut<'a, S: StateID = usize> {
1172	/// The state identifier representation used by the DFA from which this
1173	/// state was extracted. Since our transition table is compacted in a
1174	/// &[u8], we don't actually use the state ID type parameter explicitly
1175	/// anywhere, so we fake it. This prevents callers from using an incorrect
1176	/// state ID representation to read from this state.
1177	_state_id_repr: PhantomData<S>,
1178	/// The number of transitions in this state.
1179	ntrans: usize,
1180	/// Pairs of input ranges, where there is one pair for each transition.
1181	/// Each pair specifies an inclusive start and end byte range for the
1182	/// corresponding transition.
1183	input_ranges: &'a mut [u8],
1184	/// Transitions to the next state. This slice contains native endian
1185	/// encoded state identifiers, with `S` as the representation. Thus, there
1186	/// are `ntrans size_of::<S>()` bytes in this slice.*
1187	next: &'a mut [u8],
1188	}
1189
1190	#[cfg(feature = "std")]
1191	impl<'a, S: StateID> StateMut<'a, S> {
1192	/// Sets the ith transition to the given state.
1193	fn set_next_at(&mut self, i: usize, next: S) {
1194	next.write_bytes(&mut self.next[i * size_of::<S>()..]);
1195	}
1196	}
1197
1198	#[cfg(feature = "std")]
1199	impl<'a, S: StateID> fmt::Debug for StateMut<'a, S> {
1200	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1201	let state: State<'_, S> = State {
1202	_state_id_repr: self._state_id_repr,
1203	ntrans: self.ntrans,
1204	input_ranges: self.input_ranges,
1205	next: self.next,
1206	};
1207	fmt::Debug::fmt(&state, f)
1208	}
1209	}
1210
1211	/// Return the given byte as its escaped string form.
1212	#[cfg(feature = "std")]
1213	fn escape(b: u8) -> String {
1214	use std::ascii;
1215
1216	String::from_utf8(vec:ascii::escape_default(b).collect::<Vec<_>>()).unwrap()
1217	}
1218
1219	/// A binary search routine specialized specifically to a sparse DFA state's
1220	/// transitions. Specifically, the transitions are defined as a set of pairs
1221	/// of input bytes that delineate an inclusive range of bytes. If the input
1222	/// byte is in the range, then the corresponding transition is a match.
1223	///
1224	/// This binary search accepts a slice of these pairs and returns the position
1225	/// of the matching pair (the ith transition), or None if no matching pair
1226	/// could be found.
1227	///
1228	/// Note that this routine is not currently used since it was observed to
1229	/// either decrease performance when searching ASCII, or did not provide enough
1230	/// of a boost on non-ASCII haystacks to be worth it. However, we leave it here
1231	/// for posterity in case we can find a way to use it.
1232	///
1233	/// In theory, we could use the standard library's search routine if we could
1234	/// cast a `&[u8]` to a `&[(u8, u8)]`, but I don't believe this is currently
1235	/// guaranteed to be safe and is thus UB (since I don't think the in-memory
1236	/// representation of `(u8, u8)` has been nailed down).
1237	#[inline(always)]
1238	#[allow(dead_code)]
1239	fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option<usize> {
1240	debug_assert!(ranges.len() % `2` == `0`, "ranges must have even length");
1241	debug_assert!(ranges.len() <= `512`, "ranges should be short");
1242
1243	let (mut left: usize, mut right: usize) = (`0`, ranges.len() / `2`);
1244	while left < right {
1245	let mid: usize = (left + right) / `2`;
1246	let (b1: u8, b2: u8) = (ranges[mid * `2`], ranges[mid * `2` + `1`]);
1247	if needle < b1 {
1248	right = mid;
1249	} else if needle > b2 {
1250	left = mid + `1`;
1251	} else {
1252	return Some(mid);
1253	}
1254	}
1255	None
1256	}
1257