1 | use std::mem::size_of; |
2 | |
3 | use crate::ahocorasick::MatchKind; |
4 | use crate::automaton::Automaton; |
5 | use crate::classes::ByteClasses; |
6 | use crate::error::Result; |
7 | use crate::nfa::{PatternID, PatternLength, NFA}; |
8 | use crate::prefilter::{Prefilter, PrefilterObj, PrefilterState}; |
9 | use crate::state_id::{dead_id, fail_id, premultiply_overflow_error, StateID}; |
10 | use crate::Match; |
11 | |
12 | #[derive (Clone, Debug)] |
13 | pub enum DFA<S> { |
14 | Standard(Standard<S>), |
15 | ByteClass(ByteClass<S>), |
16 | Premultiplied(Premultiplied<S>), |
17 | PremultipliedByteClass(PremultipliedByteClass<S>), |
18 | } |
19 | |
20 | impl<S: StateID> DFA<S> { |
21 | fn repr(&self) -> &Repr<S> { |
22 | match *self { |
23 | DFA::Standard(ref dfa) => dfa.repr(), |
24 | DFA::ByteClass(ref dfa) => dfa.repr(), |
25 | DFA::Premultiplied(ref dfa) => dfa.repr(), |
26 | DFA::PremultipliedByteClass(ref dfa) => dfa.repr(), |
27 | } |
28 | } |
29 | |
30 | pub fn match_kind(&self) -> &MatchKind { |
31 | &self.repr().match_kind |
32 | } |
33 | |
34 | pub fn heap_bytes(&self) -> usize { |
35 | self.repr().heap_bytes |
36 | } |
37 | |
38 | pub fn max_pattern_len(&self) -> usize { |
39 | self.repr().max_pattern_len |
40 | } |
41 | |
42 | pub fn pattern_count(&self) -> usize { |
43 | self.repr().pattern_count |
44 | } |
45 | |
46 | pub fn prefilter(&self) -> Option<&dyn Prefilter> { |
47 | self.repr().prefilter.as_ref().map(|p| p.as_ref()) |
48 | } |
49 | |
50 | pub fn start_state(&self) -> S { |
51 | self.repr().start_id |
52 | } |
53 | |
54 | #[inline (always)] |
55 | pub fn overlapping_find_at( |
56 | &self, |
57 | prestate: &mut PrefilterState, |
58 | haystack: &[u8], |
59 | at: usize, |
60 | state_id: &mut S, |
61 | match_index: &mut usize, |
62 | ) -> Option<Match> { |
63 | match *self { |
64 | DFA::Standard(ref dfa) => dfa.overlapping_find_at( |
65 | prestate, |
66 | haystack, |
67 | at, |
68 | state_id, |
69 | match_index, |
70 | ), |
71 | DFA::ByteClass(ref dfa) => dfa.overlapping_find_at( |
72 | prestate, |
73 | haystack, |
74 | at, |
75 | state_id, |
76 | match_index, |
77 | ), |
78 | DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at( |
79 | prestate, |
80 | haystack, |
81 | at, |
82 | state_id, |
83 | match_index, |
84 | ), |
85 | DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at( |
86 | prestate, |
87 | haystack, |
88 | at, |
89 | state_id, |
90 | match_index, |
91 | ), |
92 | } |
93 | } |
94 | |
95 | #[inline (always)] |
96 | pub fn earliest_find_at( |
97 | &self, |
98 | prestate: &mut PrefilterState, |
99 | haystack: &[u8], |
100 | at: usize, |
101 | state_id: &mut S, |
102 | ) -> Option<Match> { |
103 | match *self { |
104 | DFA::Standard(ref dfa) => { |
105 | dfa.earliest_find_at(prestate, haystack, at, state_id) |
106 | } |
107 | DFA::ByteClass(ref dfa) => { |
108 | dfa.earliest_find_at(prestate, haystack, at, state_id) |
109 | } |
110 | DFA::Premultiplied(ref dfa) => { |
111 | dfa.earliest_find_at(prestate, haystack, at, state_id) |
112 | } |
113 | DFA::PremultipliedByteClass(ref dfa) => { |
114 | dfa.earliest_find_at(prestate, haystack, at, state_id) |
115 | } |
116 | } |
117 | } |
118 | |
119 | #[inline (always)] |
120 | pub fn find_at_no_state( |
121 | &self, |
122 | prestate: &mut PrefilterState, |
123 | haystack: &[u8], |
124 | at: usize, |
125 | ) -> Option<Match> { |
126 | match *self { |
127 | DFA::Standard(ref dfa) => { |
128 | dfa.find_at_no_state(prestate, haystack, at) |
129 | } |
130 | DFA::ByteClass(ref dfa) => { |
131 | dfa.find_at_no_state(prestate, haystack, at) |
132 | } |
133 | DFA::Premultiplied(ref dfa) => { |
134 | dfa.find_at_no_state(prestate, haystack, at) |
135 | } |
136 | DFA::PremultipliedByteClass(ref dfa) => { |
137 | dfa.find_at_no_state(prestate, haystack, at) |
138 | } |
139 | } |
140 | } |
141 | } |
142 | |
143 | #[derive (Clone, Debug)] |
144 | pub struct Standard<S>(Repr<S>); |
145 | |
146 | impl<S: StateID> Standard<S> { |
147 | fn repr(&self) -> &Repr<S> { |
148 | &self.0 |
149 | } |
150 | } |
151 | |
152 | impl<S: StateID> Automaton for Standard<S> { |
153 | type ID = S; |
154 | |
155 | fn match_kind(&self) -> &MatchKind { |
156 | &self.repr().match_kind |
157 | } |
158 | |
159 | fn anchored(&self) -> bool { |
160 | self.repr().anchored |
161 | } |
162 | |
163 | fn prefilter(&self) -> Option<&dyn Prefilter> { |
164 | self.repr().prefilter.as_ref().map(|p| p.as_ref()) |
165 | } |
166 | |
167 | fn start_state(&self) -> S { |
168 | self.repr().start_id |
169 | } |
170 | |
171 | fn is_valid(&self, id: S) -> bool { |
172 | id.to_usize() < self.repr().state_count |
173 | } |
174 | |
175 | fn is_match_state(&self, id: S) -> bool { |
176 | self.repr().is_match_state(id) |
177 | } |
178 | |
179 | fn is_match_or_dead_state(&self, id: S) -> bool { |
180 | self.repr().is_match_or_dead_state(id) |
181 | } |
182 | |
183 | fn get_match( |
184 | &self, |
185 | id: S, |
186 | match_index: usize, |
187 | end: usize, |
188 | ) -> Option<Match> { |
189 | self.repr().get_match(id, match_index, end) |
190 | } |
191 | |
192 | fn match_count(&self, id: S) -> usize { |
193 | self.repr().match_count(id) |
194 | } |
195 | |
196 | fn next_state(&self, current: S, input: u8) -> S { |
197 | let o = current.to_usize() * 256 + input as usize; |
198 | self.repr().trans[o] |
199 | } |
200 | } |
201 | |
202 | #[derive (Clone, Debug)] |
203 | pub struct ByteClass<S>(Repr<S>); |
204 | |
205 | impl<S: StateID> ByteClass<S> { |
206 | fn repr(&self) -> &Repr<S> { |
207 | &self.0 |
208 | } |
209 | } |
210 | |
211 | impl<S: StateID> Automaton for ByteClass<S> { |
212 | type ID = S; |
213 | |
214 | fn match_kind(&self) -> &MatchKind { |
215 | &self.repr().match_kind |
216 | } |
217 | |
218 | fn anchored(&self) -> bool { |
219 | self.repr().anchored |
220 | } |
221 | |
222 | fn prefilter(&self) -> Option<&dyn Prefilter> { |
223 | self.repr().prefilter.as_ref().map(|p| p.as_ref()) |
224 | } |
225 | |
226 | fn start_state(&self) -> S { |
227 | self.repr().start_id |
228 | } |
229 | |
230 | fn is_valid(&self, id: S) -> bool { |
231 | id.to_usize() < self.repr().state_count |
232 | } |
233 | |
234 | fn is_match_state(&self, id: S) -> bool { |
235 | self.repr().is_match_state(id) |
236 | } |
237 | |
238 | fn is_match_or_dead_state(&self, id: S) -> bool { |
239 | self.repr().is_match_or_dead_state(id) |
240 | } |
241 | |
242 | fn get_match( |
243 | &self, |
244 | id: S, |
245 | match_index: usize, |
246 | end: usize, |
247 | ) -> Option<Match> { |
248 | self.repr().get_match(id, match_index, end) |
249 | } |
250 | |
251 | fn match_count(&self, id: S) -> usize { |
252 | self.repr().match_count(id) |
253 | } |
254 | |
255 | fn next_state(&self, current: S, input: u8) -> S { |
256 | let alphabet_len = self.repr().byte_classes.alphabet_len(); |
257 | let input = self.repr().byte_classes.get(input); |
258 | let o = current.to_usize() * alphabet_len + input as usize; |
259 | self.repr().trans[o] |
260 | } |
261 | } |
262 | |
263 | #[derive (Clone, Debug)] |
264 | pub struct Premultiplied<S>(Repr<S>); |
265 | |
266 | impl<S: StateID> Premultiplied<S> { |
267 | fn repr(&self) -> &Repr<S> { |
268 | &self.0 |
269 | } |
270 | } |
271 | |
272 | impl<S: StateID> Automaton for Premultiplied<S> { |
273 | type ID = S; |
274 | |
275 | fn match_kind(&self) -> &MatchKind { |
276 | &self.repr().match_kind |
277 | } |
278 | |
279 | fn anchored(&self) -> bool { |
280 | self.repr().anchored |
281 | } |
282 | |
283 | fn prefilter(&self) -> Option<&dyn Prefilter> { |
284 | self.repr().prefilter.as_ref().map(|p| p.as_ref()) |
285 | } |
286 | |
287 | fn start_state(&self) -> S { |
288 | self.repr().start_id |
289 | } |
290 | |
291 | fn is_valid(&self, id: S) -> bool { |
292 | (id.to_usize() / 256) < self.repr().state_count |
293 | } |
294 | |
295 | fn is_match_state(&self, id: S) -> bool { |
296 | self.repr().is_match_state(id) |
297 | } |
298 | |
299 | fn is_match_or_dead_state(&self, id: S) -> bool { |
300 | self.repr().is_match_or_dead_state(id) |
301 | } |
302 | |
303 | fn get_match( |
304 | &self, |
305 | id: S, |
306 | match_index: usize, |
307 | end: usize, |
308 | ) -> Option<Match> { |
309 | if id > self.repr().max_match { |
310 | return None; |
311 | } |
312 | self.repr() |
313 | .matches |
314 | .get(id.to_usize() / 256) |
315 | .and_then(|m| m.get(match_index)) |
316 | .map(|&(id, len)| Match { pattern: id, len, end }) |
317 | } |
318 | |
319 | fn match_count(&self, id: S) -> usize { |
320 | let o = id.to_usize() / 256; |
321 | self.repr().matches[o].len() |
322 | } |
323 | |
324 | fn next_state(&self, current: S, input: u8) -> S { |
325 | let o = current.to_usize() + input as usize; |
326 | self.repr().trans[o] |
327 | } |
328 | } |
329 | |
330 | #[derive (Clone, Debug)] |
331 | pub struct PremultipliedByteClass<S>(Repr<S>); |
332 | |
333 | impl<S: StateID> PremultipliedByteClass<S> { |
334 | fn repr(&self) -> &Repr<S> { |
335 | &self.0 |
336 | } |
337 | } |
338 | |
339 | impl<S: StateID> Automaton for PremultipliedByteClass<S> { |
340 | type ID = S; |
341 | |
342 | fn match_kind(&self) -> &MatchKind { |
343 | &self.repr().match_kind |
344 | } |
345 | |
346 | fn anchored(&self) -> bool { |
347 | self.repr().anchored |
348 | } |
349 | |
350 | fn prefilter(&self) -> Option<&dyn Prefilter> { |
351 | self.repr().prefilter.as_ref().map(|p| p.as_ref()) |
352 | } |
353 | |
354 | fn start_state(&self) -> S { |
355 | self.repr().start_id |
356 | } |
357 | |
358 | fn is_valid(&self, id: S) -> bool { |
359 | (id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count |
360 | } |
361 | |
362 | fn is_match_state(&self, id: S) -> bool { |
363 | self.repr().is_match_state(id) |
364 | } |
365 | |
366 | fn is_match_or_dead_state(&self, id: S) -> bool { |
367 | self.repr().is_match_or_dead_state(id) |
368 | } |
369 | |
370 | fn get_match( |
371 | &self, |
372 | id: S, |
373 | match_index: usize, |
374 | end: usize, |
375 | ) -> Option<Match> { |
376 | if id > self.repr().max_match { |
377 | return None; |
378 | } |
379 | self.repr() |
380 | .matches |
381 | .get(id.to_usize() / self.repr().alphabet_len()) |
382 | .and_then(|m| m.get(match_index)) |
383 | .map(|&(id, len)| Match { pattern: id, len, end }) |
384 | } |
385 | |
386 | fn match_count(&self, id: S) -> usize { |
387 | let o = id.to_usize() / self.repr().alphabet_len(); |
388 | self.repr().matches[o].len() |
389 | } |
390 | |
391 | fn next_state(&self, current: S, input: u8) -> S { |
392 | let input = self.repr().byte_classes.get(input); |
393 | let o = current.to_usize() + input as usize; |
394 | self.repr().trans[o] |
395 | } |
396 | } |
397 | |
398 | #[derive (Clone, Debug)] |
399 | pub struct Repr<S> { |
400 | match_kind: MatchKind, |
401 | anchored: bool, |
402 | premultiplied: bool, |
403 | start_id: S, |
404 | /// The length, in bytes, of the longest pattern in this automaton. This |
405 | /// information is useful for keeping correct buffer sizes when searching |
406 | /// on streams. |
407 | max_pattern_len: usize, |
408 | /// The total number of patterns added to this automaton. This includes |
409 | /// patterns that may never match. |
410 | pattern_count: usize, |
411 | state_count: usize, |
412 | max_match: S, |
413 | /// The number of bytes of heap used by this NFA's transition table. |
414 | heap_bytes: usize, |
415 | /// A prefilter for quickly detecting candidate matchs, if pertinent. |
416 | prefilter: Option<PrefilterObj>, |
417 | byte_classes: ByteClasses, |
418 | trans: Vec<S>, |
419 | matches: Vec<Vec<(PatternID, PatternLength)>>, |
420 | } |
421 | |
422 | impl<S: StateID> Repr<S> { |
423 | /// Returns the total alphabet size for this DFA. |
424 | /// |
425 | /// If byte classes are enabled, then this corresponds to the number of |
426 | /// equivalence classes. If they are disabled, then this is always 256. |
427 | fn alphabet_len(&self) -> usize { |
428 | self.byte_classes.alphabet_len() |
429 | } |
430 | |
431 | /// Returns true only if the given state is a match state. |
432 | fn is_match_state(&self, id: S) -> bool { |
433 | id <= self.max_match && id > dead_id() |
434 | } |
435 | |
436 | /// Returns true only if the given state is either a dead state or a match |
437 | /// state. |
438 | fn is_match_or_dead_state(&self, id: S) -> bool { |
439 | id <= self.max_match |
440 | } |
441 | |
442 | /// Get the ith match for the given state, where the end position of a |
443 | /// match was found at `end`. |
444 | /// |
445 | /// # Panics |
446 | /// |
447 | /// The caller must ensure that the given state identifier is valid, |
448 | /// otherwise this may panic. The `match_index` need not be valid. That is, |
449 | /// if the given state has no matches then this returns `None`. |
450 | fn get_match( |
451 | &self, |
452 | id: S, |
453 | match_index: usize, |
454 | end: usize, |
455 | ) -> Option<Match> { |
456 | if id > self.max_match { |
457 | return None; |
458 | } |
459 | self.matches |
460 | .get(id.to_usize()) |
461 | .and_then(|m| m.get(match_index)) |
462 | .map(|&(id, len)| Match { pattern: id, len, end }) |
463 | } |
464 | |
465 | /// Return the total number of matches for the given state. |
466 | /// |
467 | /// # Panics |
468 | /// |
469 | /// The caller must ensure that the given identifier is valid, or else |
470 | /// this panics. |
471 | fn match_count(&self, id: S) -> usize { |
472 | self.matches[id.to_usize()].len() |
473 | } |
474 | |
475 | /// Get the next state given `from` as the current state and `byte` as the |
476 | /// current input byte. |
477 | fn next_state(&self, from: S, byte: u8) -> S { |
478 | let alphabet_len = self.alphabet_len(); |
479 | let byte = self.byte_classes.get(byte); |
480 | self.trans[from.to_usize() * alphabet_len + byte as usize] |
481 | } |
482 | |
483 | /// Set the `byte` transition for the `from` state to point to `to`. |
484 | fn set_next_state(&mut self, from: S, byte: u8, to: S) { |
485 | let alphabet_len = self.alphabet_len(); |
486 | let byte = self.byte_classes.get(byte); |
487 | self.trans[from.to_usize() * alphabet_len + byte as usize] = to; |
488 | } |
489 | |
490 | /// Swap the given states in place. |
491 | fn swap_states(&mut self, id1: S, id2: S) { |
492 | assert!(!self.premultiplied, "can't swap states in premultiplied DFA" ); |
493 | |
494 | let o1 = id1.to_usize() * self.alphabet_len(); |
495 | let o2 = id2.to_usize() * self.alphabet_len(); |
496 | for b in 0..self.alphabet_len() { |
497 | self.trans.swap(o1 + b, o2 + b); |
498 | } |
499 | self.matches.swap(id1.to_usize(), id2.to_usize()); |
500 | } |
501 | |
502 | /// This routine shuffles all match states in this DFA to the beginning |
503 | /// of the DFA such that every non-match state appears after every match |
504 | /// state. (With one exception: the special fail and dead states remain as |
505 | /// the first two states.) |
506 | /// |
507 | /// The purpose of doing this shuffling is to avoid an extra conditional |
508 | /// in the search loop, and in particular, detecting whether a state is a |
509 | /// match or not does not need to access any memory. |
510 | /// |
511 | /// This updates `self.max_match` to point to the last matching state as |
512 | /// well as `self.start` if the starting state was moved. |
513 | fn shuffle_match_states(&mut self) { |
514 | assert!( |
515 | !self.premultiplied, |
516 | "cannot shuffle match states of premultiplied DFA" |
517 | ); |
518 | |
519 | if self.state_count <= 1 { |
520 | return; |
521 | } |
522 | |
523 | let mut first_non_match = self.start_id.to_usize(); |
524 | while first_non_match < self.state_count |
525 | && self.matches[first_non_match].len() > 0 |
526 | { |
527 | first_non_match += 1; |
528 | } |
529 | |
530 | let mut swaps: Vec<S> = vec![fail_id(); self.state_count]; |
531 | let mut cur = self.state_count - 1; |
532 | while cur > first_non_match { |
533 | if self.matches[cur].len() > 0 { |
534 | self.swap_states( |
535 | S::from_usize(cur), |
536 | S::from_usize(first_non_match), |
537 | ); |
538 | swaps[cur] = S::from_usize(first_non_match); |
539 | swaps[first_non_match] = S::from_usize(cur); |
540 | |
541 | first_non_match += 1; |
542 | while first_non_match < cur |
543 | && self.matches[first_non_match].len() > 0 |
544 | { |
545 | first_non_match += 1; |
546 | } |
547 | } |
548 | cur -= 1; |
549 | } |
550 | for id in (0..self.state_count).map(S::from_usize) { |
551 | let alphabet_len = self.alphabet_len(); |
552 | let offset = id.to_usize() * alphabet_len; |
553 | for next in &mut self.trans[offset..offset + alphabet_len] { |
554 | if swaps[next.to_usize()] != fail_id() { |
555 | *next = swaps[next.to_usize()]; |
556 | } |
557 | } |
558 | } |
559 | if swaps[self.start_id.to_usize()] != fail_id() { |
560 | self.start_id = swaps[self.start_id.to_usize()]; |
561 | } |
562 | self.max_match = S::from_usize(first_non_match - 1); |
563 | } |
564 | |
565 | fn premultiply(&mut self) -> Result<()> { |
566 | if self.premultiplied || self.state_count <= 1 { |
567 | return Ok(()); |
568 | } |
569 | |
570 | let alpha_len = self.alphabet_len(); |
571 | premultiply_overflow_error( |
572 | S::from_usize(self.state_count - 1), |
573 | alpha_len, |
574 | )?; |
575 | |
576 | for id in (2..self.state_count).map(S::from_usize) { |
577 | let offset = id.to_usize() * alpha_len; |
578 | for next in &mut self.trans[offset..offset + alpha_len] { |
579 | if *next == dead_id() { |
580 | continue; |
581 | } |
582 | *next = S::from_usize(next.to_usize() * alpha_len); |
583 | } |
584 | } |
585 | self.premultiplied = true; |
586 | self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len); |
587 | self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len); |
588 | Ok(()) |
589 | } |
590 | |
591 | /// Computes the total amount of heap used by this NFA in bytes. |
592 | fn calculate_size(&mut self) { |
593 | let mut size = (self.trans.len() * size_of::<S>()) |
594 | + (self.matches.len() |
595 | * size_of::<Vec<(PatternID, PatternLength)>>()); |
596 | for state_matches in &self.matches { |
597 | size += |
598 | state_matches.len() * size_of::<(PatternID, PatternLength)>(); |
599 | } |
600 | size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()); |
601 | self.heap_bytes = size; |
602 | } |
603 | } |
604 | |
605 | /// A builder for configuring the determinization of an NFA into a DFA. |
606 | #[derive (Clone, Debug)] |
607 | pub struct Builder { |
608 | premultiply: bool, |
609 | byte_classes: bool, |
610 | } |
611 | |
612 | impl Builder { |
613 | /// Create a new builder for a DFA. |
614 | pub fn new() -> Builder { |
615 | Builder { premultiply: true, byte_classes: true } |
616 | } |
617 | |
618 | /// Build a DFA from the given NFA. |
619 | /// |
620 | /// This returns an error if the state identifiers exceed their |
621 | /// representation size. This can only happen when state ids are |
622 | /// premultiplied (which is enabled by default). |
623 | pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> { |
624 | let byte_classes = if self.byte_classes { |
625 | nfa.byte_classes().clone() |
626 | } else { |
627 | ByteClasses::singletons() |
628 | }; |
629 | let alphabet_len = byte_classes.alphabet_len(); |
630 | let trans = vec![fail_id(); alphabet_len * nfa.state_len()]; |
631 | let matches = vec![vec![]; nfa.state_len()]; |
632 | let mut repr = Repr { |
633 | match_kind: nfa.match_kind().clone(), |
634 | anchored: nfa.anchored(), |
635 | premultiplied: false, |
636 | start_id: nfa.start_state(), |
637 | max_pattern_len: nfa.max_pattern_len(), |
638 | pattern_count: nfa.pattern_count(), |
639 | state_count: nfa.state_len(), |
640 | max_match: fail_id(), |
641 | heap_bytes: 0, |
642 | prefilter: nfa.prefilter_obj().map(|p| p.clone()), |
643 | byte_classes: byte_classes.clone(), |
644 | trans, |
645 | matches, |
646 | }; |
647 | for id in (0..nfa.state_len()).map(S::from_usize) { |
648 | repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id)); |
649 | |
650 | let fail = nfa.failure_transition(id); |
651 | nfa.iter_all_transitions(&byte_classes, id, |b, mut next| { |
652 | if next == fail_id() { |
653 | next = nfa_next_state_memoized(nfa, &repr, id, fail, b); |
654 | } |
655 | repr.set_next_state(id, b, next); |
656 | }); |
657 | } |
658 | repr.shuffle_match_states(); |
659 | repr.calculate_size(); |
660 | if self.premultiply { |
661 | repr.premultiply()?; |
662 | if byte_classes.is_singleton() { |
663 | Ok(DFA::Premultiplied(Premultiplied(repr))) |
664 | } else { |
665 | Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr))) |
666 | } |
667 | } else { |
668 | if byte_classes.is_singleton() { |
669 | Ok(DFA::Standard(Standard(repr))) |
670 | } else { |
671 | Ok(DFA::ByteClass(ByteClass(repr))) |
672 | } |
673 | } |
674 | } |
675 | |
676 | /// Whether to use byte classes or in the DFA. |
677 | pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { |
678 | self.byte_classes = yes; |
679 | self |
680 | } |
681 | |
682 | /// Whether to premultiply state identifier in the DFA. |
683 | pub fn premultiply(&mut self, yes: bool) -> &mut Builder { |
684 | self.premultiply = yes; |
685 | self |
686 | } |
687 | } |
688 | |
689 | /// This returns the next NFA transition (including resolving failure |
690 | /// transitions), except once it sees a state id less than the id of the DFA |
691 | /// state that is currently being populated, then we no longer need to follow |
692 | /// failure transitions and can instead query the pre-computed state id from |
693 | /// the DFA itself. |
694 | /// |
695 | /// In general, this should only be called when a failure transition is seen. |
696 | fn nfa_next_state_memoized<S: StateID>( |
697 | nfa: &NFA<S>, |
698 | dfa: &Repr<S>, |
699 | populating: S, |
700 | mut current: S, |
701 | input: u8, |
702 | ) -> S { |
703 | loop { |
704 | if current < populating { |
705 | return dfa.next_state(from:current, byte:input); |
706 | } |
707 | let next: S = nfa.next_state(current, input); |
708 | if next != fail_id() { |
709 | return next; |
710 | } |
711 | current = nfa.failure_transition(id:current); |
712 | } |
713 | } |
714 | |