1 | /*! |
2 | Provides routines for extracting literal prefixes and suffixes from an `Hir`. |
3 | */ |
4 | |
5 | use std::cmp; |
6 | use std::fmt; |
7 | use std::iter; |
8 | use std::mem; |
9 | use std::ops; |
10 | |
11 | use crate::hir::{self, Hir, HirKind}; |
12 | |
13 | /// A set of literal byte strings extracted from a regular expression. |
14 | /// |
15 | /// Every member of the set is a `Literal`, which is represented by a |
16 | /// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is |
17 | /// said to be either *complete* or *cut*. A complete literal means that |
18 | /// it extends until the beginning (or end) of the regular expression. In |
19 | /// some circumstances, this can be used to indicate a match in the regular |
20 | /// expression. |
21 | /// |
22 | /// A key aspect of literal extraction is knowing when to stop. It is not |
23 | /// feasible to blindly extract all literals from a regular expression, even if |
24 | /// there are finitely many. For example, the regular expression `[0-9]{10}` |
25 | /// has `10^10` distinct literals. For this reason, literal extraction is |
26 | /// bounded to some low number by default using heuristics, but the limits can |
27 | /// be tweaked. |
28 | /// |
29 | /// **WARNING**: Literal extraction uses stack space proportional to the size |
30 | /// of the `Hir` expression. At some point, this drawback will be eliminated. |
31 | /// To protect yourself, set a reasonable |
32 | /// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit). |
33 | /// This is done for you by default. |
34 | #[derive (Clone, Eq, PartialEq)] |
35 | pub struct Literals { |
36 | lits: Vec<Literal>, |
37 | limit_size: usize, |
38 | limit_class: usize, |
39 | } |
40 | |
41 | /// A single member of a set of literals extracted from a regular expression. |
42 | /// |
43 | /// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice |
44 | /// and `Vec` operations are available. |
45 | #[derive (Clone, Eq, Ord)] |
46 | pub struct Literal { |
47 | v: Vec<u8>, |
48 | cut: bool, |
49 | } |
50 | |
51 | impl Literals { |
52 | /// Returns a new empty set of literals using default limits. |
53 | pub fn empty() -> Literals { |
54 | Literals { lits: vec![], limit_size: 250, limit_class: 10 } |
55 | } |
56 | |
57 | /// Returns a set of literal prefixes extracted from the given `Hir`. |
58 | pub fn prefixes(expr: &Hir) -> Literals { |
59 | let mut lits = Literals::empty(); |
60 | lits.union_prefixes(expr); |
61 | lits |
62 | } |
63 | |
64 | /// Returns a set of literal suffixes extracted from the given `Hir`. |
65 | pub fn suffixes(expr: &Hir) -> Literals { |
66 | let mut lits = Literals::empty(); |
67 | lits.union_suffixes(expr); |
68 | lits |
69 | } |
70 | |
71 | /// Get the approximate size limit (in bytes) of this set. |
72 | pub fn limit_size(&self) -> usize { |
73 | self.limit_size |
74 | } |
75 | |
76 | /// Set the approximate size limit (in bytes) of this set. |
77 | /// |
78 | /// If extracting a literal would put the set over this limit, then |
79 | /// extraction stops. |
80 | /// |
81 | /// The new limits will only apply to additions to this set. Existing |
82 | /// members remain unchanged, even if the set exceeds the new limit. |
83 | pub fn set_limit_size(&mut self, size: usize) -> &mut Literals { |
84 | self.limit_size = size; |
85 | self |
86 | } |
87 | |
88 | /// Get the character class size limit for this set. |
89 | pub fn limit_class(&self) -> usize { |
90 | self.limit_class |
91 | } |
92 | |
93 | /// Limits the size of character(or byte) classes considered. |
94 | /// |
95 | /// A value of `0` prevents all character classes from being considered. |
96 | /// |
97 | /// This limit also applies to case insensitive literals, since each |
98 | /// character in the case insensitive literal is converted to a class, and |
99 | /// then case folded. |
100 | /// |
101 | /// The new limits will only apply to additions to this set. Existing |
102 | /// members remain unchanged, even if the set exceeds the new limit. |
103 | pub fn set_limit_class(&mut self, size: usize) -> &mut Literals { |
104 | self.limit_class = size; |
105 | self |
106 | } |
107 | |
108 | /// Returns the set of literals as a slice. Its order is unspecified. |
109 | pub fn literals(&self) -> &[Literal] { |
110 | &self.lits |
111 | } |
112 | |
113 | /// Returns the length of the smallest literal. |
114 | /// |
115 | /// Returns None is there are no literals in the set. |
116 | pub fn min_len(&self) -> Option<usize> { |
117 | let mut min = None; |
118 | for lit in &self.lits { |
119 | match min { |
120 | None => min = Some(lit.len()), |
121 | Some(m) if lit.len() < m => min = Some(lit.len()), |
122 | _ => {} |
123 | } |
124 | } |
125 | min |
126 | } |
127 | |
128 | /// Returns true if all members in this set are complete. |
129 | pub fn all_complete(&self) -> bool { |
130 | !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut()) |
131 | } |
132 | |
133 | /// Returns true if any member in this set is complete. |
134 | pub fn any_complete(&self) -> bool { |
135 | self.lits.iter().any(|lit| !lit.is_cut()) |
136 | } |
137 | |
138 | /// Returns true if this set contains an empty literal. |
139 | pub fn contains_empty(&self) -> bool { |
140 | self.lits.iter().any(|lit| lit.is_empty()) |
141 | } |
142 | |
143 | /// Returns true if this set is empty or if all of its members is empty. |
144 | pub fn is_empty(&self) -> bool { |
145 | self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty()) |
146 | } |
147 | |
148 | /// Returns a new empty set of literals using this set's limits. |
149 | pub fn to_empty(&self) -> Literals { |
150 | let mut lits = Literals::empty(); |
151 | lits.set_limit_size(self.limit_size).set_limit_class(self.limit_class); |
152 | lits |
153 | } |
154 | |
155 | /// Returns the longest common prefix of all members in this set. |
156 | pub fn longest_common_prefix(&self) -> &[u8] { |
157 | if self.is_empty() { |
158 | return &[]; |
159 | } |
160 | let lit0 = &*self.lits[0]; |
161 | let mut len = lit0.len(); |
162 | for lit in &self.lits[1..] { |
163 | len = cmp::min( |
164 | len, |
165 | lit.iter().zip(lit0).take_while(|&(a, b)| a == b).count(), |
166 | ); |
167 | } |
168 | &self.lits[0][..len] |
169 | } |
170 | |
171 | /// Returns the longest common suffix of all members in this set. |
172 | pub fn longest_common_suffix(&self) -> &[u8] { |
173 | if self.is_empty() { |
174 | return &[]; |
175 | } |
176 | let lit0 = &*self.lits[0]; |
177 | let mut len = lit0.len(); |
178 | for lit in &self.lits[1..] { |
179 | len = cmp::min( |
180 | len, |
181 | lit.iter() |
182 | .rev() |
183 | .zip(lit0.iter().rev()) |
184 | .take_while(|&(a, b)| a == b) |
185 | .count(), |
186 | ); |
187 | } |
188 | &self.lits[0][self.lits[0].len() - len..] |
189 | } |
190 | |
191 | /// Returns a new set of literals with the given number of bytes trimmed |
192 | /// from the suffix of each literal. |
193 | /// |
194 | /// If any literal would be cut out completely by trimming, then None is |
195 | /// returned. |
196 | /// |
197 | /// Any duplicates that are created as a result of this transformation are |
198 | /// removed. |
199 | pub fn trim_suffix(&self, num_bytes: usize) -> Option<Literals> { |
200 | if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) { |
201 | return None; |
202 | } |
203 | let mut new = self.to_empty(); |
204 | for mut lit in self.lits.iter().cloned() { |
205 | let new_len = lit.len() - num_bytes; |
206 | lit.truncate(new_len); |
207 | lit.cut(); |
208 | new.lits.push(lit); |
209 | } |
210 | new.lits.sort(); |
211 | new.lits.dedup(); |
212 | Some(new) |
213 | } |
214 | |
215 | /// Returns a new set of prefixes of this set of literals that are |
216 | /// guaranteed to be unambiguous. |
217 | /// |
218 | /// Any substring match with a member of the set is returned is guaranteed |
219 | /// to never overlap with a substring match of another member of the set |
220 | /// at the same starting position. |
221 | /// |
222 | /// Given any two members of the returned set, neither is a substring of |
223 | /// the other. |
224 | pub fn unambiguous_prefixes(&self) -> Literals { |
225 | if self.lits.is_empty() { |
226 | return self.to_empty(); |
227 | } |
228 | let mut old = self.lits.to_vec(); |
229 | let mut new = self.to_empty(); |
230 | 'OUTER: while let Some(mut candidate) = old.pop() { |
231 | if candidate.is_empty() { |
232 | continue; |
233 | } |
234 | if new.lits.is_empty() { |
235 | new.lits.push(candidate); |
236 | continue; |
237 | } |
238 | for lit2 in &mut new.lits { |
239 | if lit2.is_empty() { |
240 | continue; |
241 | } |
242 | if &candidate == lit2 { |
243 | // If the literal is already in the set, then we can |
244 | // just drop it. But make sure that cut literals are |
245 | // infectious! |
246 | candidate.cut = candidate.cut || lit2.cut; |
247 | lit2.cut = candidate.cut; |
248 | continue 'OUTER; |
249 | } |
250 | if candidate.len() < lit2.len() { |
251 | if let Some(i) = position(&candidate, &lit2) { |
252 | candidate.cut(); |
253 | let mut lit3 = lit2.clone(); |
254 | lit3.truncate(i); |
255 | lit3.cut(); |
256 | old.push(lit3); |
257 | lit2.clear(); |
258 | } |
259 | } else if let Some(i) = position(&lit2, &candidate) { |
260 | lit2.cut(); |
261 | let mut new_candidate = candidate.clone(); |
262 | new_candidate.truncate(i); |
263 | new_candidate.cut(); |
264 | old.push(new_candidate); |
265 | candidate.clear(); |
266 | } |
267 | // Oops, the candidate is already represented in the set. |
268 | if candidate.is_empty() { |
269 | continue 'OUTER; |
270 | } |
271 | } |
272 | new.lits.push(candidate); |
273 | } |
274 | new.lits.retain(|lit| !lit.is_empty()); |
275 | new.lits.sort(); |
276 | new.lits.dedup(); |
277 | new |
278 | } |
279 | |
280 | /// Returns a new set of suffixes of this set of literals that are |
281 | /// guaranteed to be unambiguous. |
282 | /// |
283 | /// Any substring match with a member of the set is returned is guaranteed |
284 | /// to never overlap with a substring match of another member of the set |
285 | /// at the same ending position. |
286 | /// |
287 | /// Given any two members of the returned set, neither is a substring of |
288 | /// the other. |
289 | pub fn unambiguous_suffixes(&self) -> Literals { |
290 | // This is a touch wasteful... |
291 | let mut lits = self.clone(); |
292 | lits.reverse(); |
293 | let mut unamb = lits.unambiguous_prefixes(); |
294 | unamb.reverse(); |
295 | unamb |
296 | } |
297 | |
298 | /// Unions the prefixes from the given expression to this set. |
299 | /// |
300 | /// If prefixes could not be added (for example, this set would exceed its |
301 | /// size limits or the set of prefixes from `expr` includes the empty |
302 | /// string), then false is returned. |
303 | /// |
304 | /// Note that prefix literals extracted from `expr` are said to be complete |
305 | /// if and only if the literal extends from the beginning of `expr` to the |
306 | /// end of `expr`. |
307 | pub fn union_prefixes(&mut self, expr: &Hir) -> bool { |
308 | let mut lits = self.to_empty(); |
309 | prefixes(expr, &mut lits); |
310 | !lits.is_empty() && !lits.contains_empty() && self.union(lits) |
311 | } |
312 | |
313 | /// Unions the suffixes from the given expression to this set. |
314 | /// |
315 | /// If suffixes could not be added (for example, this set would exceed its |
316 | /// size limits or the set of suffixes from `expr` includes the empty |
317 | /// string), then false is returned. |
318 | /// |
319 | /// Note that prefix literals extracted from `expr` are said to be complete |
320 | /// if and only if the literal extends from the end of `expr` to the |
321 | /// beginning of `expr`. |
322 | pub fn union_suffixes(&mut self, expr: &Hir) -> bool { |
323 | let mut lits = self.to_empty(); |
324 | suffixes(expr, &mut lits); |
325 | lits.reverse(); |
326 | !lits.is_empty() && !lits.contains_empty() && self.union(lits) |
327 | } |
328 | |
329 | /// Unions this set with another set. |
330 | /// |
331 | /// If the union would cause the set to exceed its limits, then the union |
332 | /// is skipped and it returns false. Otherwise, if the union succeeds, it |
333 | /// returns true. |
334 | pub fn union(&mut self, lits: Literals) -> bool { |
335 | if self.num_bytes() + lits.num_bytes() > self.limit_size { |
336 | return false; |
337 | } |
338 | if lits.is_empty() { |
339 | self.lits.push(Literal::empty()); |
340 | } else { |
341 | self.lits.extend(lits.lits); |
342 | } |
343 | true |
344 | } |
345 | |
346 | /// Extends this set with another set. |
347 | /// |
348 | /// The set of literals is extended via a cross product. |
349 | /// |
350 | /// If a cross product would cause this set to exceed its limits, then the |
351 | /// cross product is skipped and it returns false. Otherwise, if the cross |
352 | /// product succeeds, it returns true. |
353 | pub fn cross_product(&mut self, lits: &Literals) -> bool { |
354 | if lits.is_empty() { |
355 | return true; |
356 | } |
357 | // Check that we make sure we stay in our limits. |
358 | let mut size_after; |
359 | if self.is_empty() || !self.any_complete() { |
360 | size_after = self.num_bytes(); |
361 | for lits_lit in lits.literals() { |
362 | size_after += lits_lit.len(); |
363 | } |
364 | } else { |
365 | size_after = self.lits.iter().fold(0, |accum, lit| { |
366 | accum + if lit.is_cut() { lit.len() } else { 0 } |
367 | }); |
368 | for lits_lit in lits.literals() { |
369 | for self_lit in self.literals() { |
370 | if !self_lit.is_cut() { |
371 | size_after += self_lit.len() + lits_lit.len(); |
372 | } |
373 | } |
374 | } |
375 | } |
376 | if size_after > self.limit_size { |
377 | return false; |
378 | } |
379 | |
380 | let mut base = self.remove_complete(); |
381 | if base.is_empty() { |
382 | base = vec![Literal::empty()]; |
383 | } |
384 | for lits_lit in lits.literals() { |
385 | for mut self_lit in base.clone() { |
386 | self_lit.extend(&**lits_lit); |
387 | self_lit.cut = lits_lit.cut; |
388 | self.lits.push(self_lit); |
389 | } |
390 | } |
391 | true |
392 | } |
393 | |
394 | /// Extends each literal in this set with the bytes given. |
395 | /// |
396 | /// If the set is empty, then the given literal is added to the set. |
397 | /// |
398 | /// If adding any number of bytes to all members of this set causes a limit |
399 | /// to be exceeded, then no bytes are added and false is returned. If a |
400 | /// prefix of `bytes` can be fit into this set, then it is used and all |
401 | /// resulting literals are cut. |
402 | pub fn cross_add(&mut self, bytes: &[u8]) -> bool { |
403 | // N.B. This could be implemented by simply calling cross_product with |
404 | // a literal set containing just `bytes`, but we can be smarter about |
405 | // taking shorter prefixes of `bytes` if they'll fit. |
406 | if bytes.is_empty() { |
407 | return true; |
408 | } |
409 | if self.lits.is_empty() { |
410 | let i = cmp::min(self.limit_size, bytes.len()); |
411 | self.lits.push(Literal::new(bytes[..i].to_owned())); |
412 | self.lits[0].cut = i < bytes.len(); |
413 | return !self.lits[0].is_cut(); |
414 | } |
415 | let size = self.num_bytes(); |
416 | if size + self.lits.len() >= self.limit_size { |
417 | return false; |
418 | } |
419 | let mut i = 1; |
420 | while size + (i * self.lits.len()) <= self.limit_size |
421 | && i < bytes.len() |
422 | { |
423 | i += 1; |
424 | } |
425 | for lit in &mut self.lits { |
426 | if !lit.is_cut() { |
427 | lit.extend(&bytes[..i]); |
428 | if i < bytes.len() { |
429 | lit.cut(); |
430 | } |
431 | } |
432 | } |
433 | true |
434 | } |
435 | |
436 | /// Adds the given literal to this set. |
437 | /// |
438 | /// Returns false if adding this literal would cause the class to be too |
439 | /// big. |
440 | pub fn add(&mut self, lit: Literal) -> bool { |
441 | if self.num_bytes() + lit.len() > self.limit_size { |
442 | return false; |
443 | } |
444 | self.lits.push(lit); |
445 | true |
446 | } |
447 | |
448 | /// Extends each literal in this set with the character class given. |
449 | /// |
450 | /// Returns false if the character class was too big to add. |
451 | pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool { |
452 | self._add_char_class(cls, false) |
453 | } |
454 | |
455 | /// Extends each literal in this set with the character class given, |
456 | /// writing the bytes of each character in reverse. |
457 | /// |
458 | /// Returns false if the character class was too big to add. |
459 | fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool { |
460 | self._add_char_class(cls, true) |
461 | } |
462 | |
463 | fn _add_char_class( |
464 | &mut self, |
465 | cls: &hir::ClassUnicode, |
466 | reverse: bool, |
467 | ) -> bool { |
468 | use std::char; |
469 | |
470 | if self.class_exceeds_limits(cls_char_count(cls)) { |
471 | return false; |
472 | } |
473 | let mut base = self.remove_complete(); |
474 | if base.is_empty() { |
475 | base = vec![Literal::empty()]; |
476 | } |
477 | for r in cls.iter() { |
478 | let (s, e) = (r.start as u32, r.end as u32 + 1); |
479 | for c in (s..e).filter_map(char::from_u32) { |
480 | for mut lit in base.clone() { |
481 | let mut bytes = c.to_string().into_bytes(); |
482 | if reverse { |
483 | bytes.reverse(); |
484 | } |
485 | lit.extend(&bytes); |
486 | self.lits.push(lit); |
487 | } |
488 | } |
489 | } |
490 | true |
491 | } |
492 | |
493 | /// Extends each literal in this set with the byte class given. |
494 | /// |
495 | /// Returns false if the byte class was too big to add. |
496 | pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool { |
497 | if self.class_exceeds_limits(cls_byte_count(cls)) { |
498 | return false; |
499 | } |
500 | let mut base = self.remove_complete(); |
501 | if base.is_empty() { |
502 | base = vec![Literal::empty()]; |
503 | } |
504 | for r in cls.iter() { |
505 | let (s, e) = (r.start as u32, r.end as u32 + 1); |
506 | for b in (s..e).map(|b| b as u8) { |
507 | for mut lit in base.clone() { |
508 | lit.push(b); |
509 | self.lits.push(lit); |
510 | } |
511 | } |
512 | } |
513 | true |
514 | } |
515 | |
516 | /// Cuts every member of this set. When a member is cut, it can never |
517 | /// be extended. |
518 | pub fn cut(&mut self) { |
519 | for lit in &mut self.lits { |
520 | lit.cut(); |
521 | } |
522 | } |
523 | |
524 | /// Reverses all members in place. |
525 | pub fn reverse(&mut self) { |
526 | for lit in &mut self.lits { |
527 | lit.reverse(); |
528 | } |
529 | } |
530 | |
531 | /// Clears this set of all members. |
532 | pub fn clear(&mut self) { |
533 | self.lits.clear(); |
534 | } |
535 | |
536 | /// Pops all complete literals out of this set. |
537 | fn remove_complete(&mut self) -> Vec<Literal> { |
538 | let mut base = vec![]; |
539 | for lit in mem::replace(&mut self.lits, vec![]) { |
540 | if lit.is_cut() { |
541 | self.lits.push(lit); |
542 | } else { |
543 | base.push(lit); |
544 | } |
545 | } |
546 | base |
547 | } |
548 | |
549 | /// Returns the total number of bytes in this set. |
550 | fn num_bytes(&self) -> usize { |
551 | self.lits.iter().fold(0, |accum, lit| accum + lit.len()) |
552 | } |
553 | |
554 | /// Returns true if a character class with the given size would cause this |
555 | /// set to exceed its limits. |
556 | /// |
557 | /// The size given should correspond to the number of items in the class. |
558 | fn class_exceeds_limits(&self, size: usize) -> bool { |
559 | if size > self.limit_class { |
560 | return true; |
561 | } |
562 | // This is an approximation since codepoints in a char class can encode |
563 | // to 1-4 bytes. |
564 | let new_byte_count = if self.lits.is_empty() { |
565 | size |
566 | } else { |
567 | self.lits.iter().fold(0, |accum, lit| { |
568 | accum |
569 | + if lit.is_cut() { |
570 | // If the literal is cut, then we'll never add |
571 | // anything to it, so don't count it. |
572 | 0 |
573 | } else { |
574 | (lit.len() + 1) * size |
575 | } |
576 | }) |
577 | }; |
578 | new_byte_count > self.limit_size |
579 | } |
580 | } |
581 | |
582 | fn prefixes(expr: &Hir, lits: &mut Literals) { |
583 | match *expr.kind() { |
584 | HirKind::Literal(hir::Literal::Unicode(c)) => { |
585 | let mut buf = [0; 4]; |
586 | lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); |
587 | } |
588 | HirKind::Literal(hir::Literal::Byte(b)) => { |
589 | lits.cross_add(&[b]); |
590 | } |
591 | HirKind::Class(hir::Class::Unicode(ref cls)) => { |
592 | if !lits.add_char_class(cls) { |
593 | lits.cut(); |
594 | } |
595 | } |
596 | HirKind::Class(hir::Class::Bytes(ref cls)) => { |
597 | if !lits.add_byte_class(cls) { |
598 | lits.cut(); |
599 | } |
600 | } |
601 | HirKind::Group(hir::Group { ref hir, .. }) => { |
602 | prefixes(&**hir, lits); |
603 | } |
604 | HirKind::Repetition(ref x) => match x.kind { |
605 | hir::RepetitionKind::ZeroOrOne => { |
606 | repeat_zero_or_one_literals(&x.hir, lits, prefixes); |
607 | } |
608 | hir::RepetitionKind::ZeroOrMore => { |
609 | repeat_zero_or_more_literals(&x.hir, lits, prefixes); |
610 | } |
611 | hir::RepetitionKind::OneOrMore => { |
612 | repeat_one_or_more_literals(&x.hir, lits, prefixes); |
613 | } |
614 | hir::RepetitionKind::Range(ref rng) => { |
615 | let (min, max) = match *rng { |
616 | hir::RepetitionRange::Exactly(m) => (m, Some(m)), |
617 | hir::RepetitionRange::AtLeast(m) => (m, None), |
618 | hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), |
619 | }; |
620 | repeat_range_literals( |
621 | &x.hir, min, max, x.greedy, lits, prefixes, |
622 | ) |
623 | } |
624 | }, |
625 | HirKind::Concat(ref es) if es.is_empty() => {} |
626 | HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits), |
627 | HirKind::Concat(ref es) => { |
628 | for e in es { |
629 | if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() { |
630 | if !lits.is_empty() { |
631 | lits.cut(); |
632 | break; |
633 | } |
634 | lits.add(Literal::empty()); |
635 | continue; |
636 | } |
637 | let mut lits2 = lits.to_empty(); |
638 | prefixes(e, &mut lits2); |
639 | if !lits.cross_product(&lits2) || !lits2.any_complete() { |
640 | // If this expression couldn't yield any literal that |
641 | // could be extended, then we need to quit. Since we're |
642 | // short-circuiting, we also need to freeze every member. |
643 | lits.cut(); |
644 | break; |
645 | } |
646 | } |
647 | } |
648 | HirKind::Alternation(ref es) => { |
649 | alternate_literals(es, lits, prefixes); |
650 | } |
651 | _ => lits.cut(), |
652 | } |
653 | } |
654 | |
655 | fn suffixes(expr: &Hir, lits: &mut Literals) { |
656 | match *expr.kind() { |
657 | HirKind::Literal(hir::Literal::Unicode(c)) => { |
658 | let mut buf = [0u8; 4]; |
659 | let i = c.encode_utf8(&mut buf).len(); |
660 | let buf = &mut buf[..i]; |
661 | buf.reverse(); |
662 | lits.cross_add(buf); |
663 | } |
664 | HirKind::Literal(hir::Literal::Byte(b)) => { |
665 | lits.cross_add(&[b]); |
666 | } |
667 | HirKind::Class(hir::Class::Unicode(ref cls)) => { |
668 | if !lits.add_char_class_reverse(cls) { |
669 | lits.cut(); |
670 | } |
671 | } |
672 | HirKind::Class(hir::Class::Bytes(ref cls)) => { |
673 | if !lits.add_byte_class(cls) { |
674 | lits.cut(); |
675 | } |
676 | } |
677 | HirKind::Group(hir::Group { ref hir, .. }) => { |
678 | suffixes(&**hir, lits); |
679 | } |
680 | HirKind::Repetition(ref x) => match x.kind { |
681 | hir::RepetitionKind::ZeroOrOne => { |
682 | repeat_zero_or_one_literals(&x.hir, lits, suffixes); |
683 | } |
684 | hir::RepetitionKind::ZeroOrMore => { |
685 | repeat_zero_or_more_literals(&x.hir, lits, suffixes); |
686 | } |
687 | hir::RepetitionKind::OneOrMore => { |
688 | repeat_one_or_more_literals(&x.hir, lits, suffixes); |
689 | } |
690 | hir::RepetitionKind::Range(ref rng) => { |
691 | let (min, max) = match *rng { |
692 | hir::RepetitionRange::Exactly(m) => (m, Some(m)), |
693 | hir::RepetitionRange::AtLeast(m) => (m, None), |
694 | hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), |
695 | }; |
696 | repeat_range_literals( |
697 | &x.hir, min, max, x.greedy, lits, suffixes, |
698 | ) |
699 | } |
700 | }, |
701 | HirKind::Concat(ref es) if es.is_empty() => {} |
702 | HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits), |
703 | HirKind::Concat(ref es) => { |
704 | for e in es.iter().rev() { |
705 | if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() { |
706 | if !lits.is_empty() { |
707 | lits.cut(); |
708 | break; |
709 | } |
710 | lits.add(Literal::empty()); |
711 | continue; |
712 | } |
713 | let mut lits2 = lits.to_empty(); |
714 | suffixes(e, &mut lits2); |
715 | if !lits.cross_product(&lits2) || !lits2.any_complete() { |
716 | // If this expression couldn't yield any literal that |
717 | // could be extended, then we need to quit. Since we're |
718 | // short-circuiting, we also need to freeze every member. |
719 | lits.cut(); |
720 | break; |
721 | } |
722 | } |
723 | } |
724 | HirKind::Alternation(ref es) => { |
725 | alternate_literals(es, lits, suffixes); |
726 | } |
727 | _ => lits.cut(), |
728 | } |
729 | } |
730 | |
731 | fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>( |
732 | e: &Hir, |
733 | lits: &mut Literals, |
734 | mut f: F, |
735 | ) { |
736 | f( |
737 | &Hir::repetition(rep:hir::Repetition { |
738 | kind: hir::RepetitionKind::ZeroOrMore, |
739 | // FIXME: Our literal extraction doesn't care about greediness. |
740 | // Which is partially why we're treating 'e?' as 'e*'. Namely, |
741 | // 'ab??' yields [Complete(ab), Complete(a)], but it should yield |
742 | // [Complete(a), Complete(ab)] because of the non-greediness. |
743 | greedy: true, |
744 | hir: Box::new(e.clone()), |
745 | }), |
746 | lits, |
747 | ); |
748 | } |
749 | |
750 | fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>( |
751 | e: &Hir, |
752 | lits: &mut Literals, |
753 | mut f: F, |
754 | ) { |
755 | let (mut lits2: Literals, mut lits3: Literals) = (lits.clone(), lits.to_empty()); |
756 | lits3.set_limit_size(lits.limit_size() / 2); |
757 | f(e, &mut lits3); |
758 | |
759 | if lits3.is_empty() || !lits2.cross_product(&lits3) { |
760 | lits.cut(); |
761 | return; |
762 | } |
763 | lits2.cut(); |
764 | lits2.add(lit:Literal::empty()); |
765 | if !lits.union(lits:lits2) { |
766 | lits.cut(); |
767 | } |
768 | } |
769 | |
770 | fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>( |
771 | e: &Hir, |
772 | lits: &mut Literals, |
773 | mut f: F, |
774 | ) { |
775 | f(e, lits); |
776 | lits.cut(); |
777 | } |
778 | |
779 | fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>( |
780 | e: &Hir, |
781 | min: u32, |
782 | max: Option<u32>, |
783 | greedy: bool, |
784 | lits: &mut Literals, |
785 | mut f: F, |
786 | ) { |
787 | if min == 0 { |
788 | // This is a bit conservative. If `max` is set, then we could |
789 | // treat this as a finite set of alternations. For now, we |
790 | // just treat it as `e*`. |
791 | f( |
792 | &Hir::repetition(hir::Repetition { |
793 | kind: hir::RepetitionKind::ZeroOrMore, |
794 | greedy, |
795 | hir: Box::new(e.clone()), |
796 | }), |
797 | lits, |
798 | ); |
799 | } else { |
800 | if min > 0 { |
801 | let n = cmp::min(lits.limit_size, min as usize); |
802 | let es = iter::repeat(e.clone()).take(n).collect(); |
803 | f(&Hir::concat(es), lits); |
804 | if n < min as usize || lits.contains_empty() { |
805 | lits.cut(); |
806 | } |
807 | } |
808 | if max.map_or(true, |max| min < max) { |
809 | lits.cut(); |
810 | } |
811 | } |
812 | } |
813 | |
814 | fn alternate_literals<F: FnMut(&Hir, &mut Literals)>( |
815 | es: &[Hir], |
816 | lits: &mut Literals, |
817 | mut f: F, |
818 | ) { |
819 | let mut lits2: Literals = lits.to_empty(); |
820 | for e: &Hir in es { |
821 | let mut lits3: Literals = lits.to_empty(); |
822 | lits3.set_limit_size(lits.limit_size() / 5); |
823 | f(e, &mut lits3); |
824 | if lits3.is_empty() || !lits2.union(lits:lits3) { |
825 | // If we couldn't find suffixes for *any* of the |
826 | // alternates, then the entire alternation has to be thrown |
827 | // away and any existing members must be frozen. Similarly, |
828 | // if the union couldn't complete, stop and freeze. |
829 | lits.cut(); |
830 | return; |
831 | } |
832 | } |
833 | if !lits.cross_product(&lits2) { |
834 | lits.cut(); |
835 | } |
836 | } |
837 | |
838 | impl fmt::Debug for Literals { |
839 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
840 | f&mut DebugStruct<'_, '_>.debug_struct("Literals" ) |
841 | .field("lits" , &self.lits) |
842 | .field("limit_size" , &self.limit_size) |
843 | .field(name:"limit_class" , &self.limit_class) |
844 | .finish() |
845 | } |
846 | } |
847 | |
848 | impl Literal { |
849 | /// Returns a new complete literal with the bytes given. |
850 | pub fn new(bytes: Vec<u8>) -> Literal { |
851 | Literal { v: bytes, cut: false } |
852 | } |
853 | |
854 | /// Returns a new complete empty literal. |
855 | pub fn empty() -> Literal { |
856 | Literal { v: vec![], cut: false } |
857 | } |
858 | |
859 | /// Returns true if this literal was "cut." |
860 | pub fn is_cut(&self) -> bool { |
861 | self.cut |
862 | } |
863 | |
864 | /// Cuts this literal. |
865 | pub fn cut(&mut self) { |
866 | self.cut = true; |
867 | } |
868 | } |
869 | |
870 | impl PartialEq for Literal { |
871 | fn eq(&self, other: &Literal) -> bool { |
872 | self.v == other.v |
873 | } |
874 | } |
875 | |
876 | impl PartialOrd for Literal { |
877 | fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> { |
878 | self.v.partial_cmp(&other.v) |
879 | } |
880 | } |
881 | |
882 | impl fmt::Debug for Literal { |
883 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
884 | if self.is_cut() { |
885 | write!(f, "Cut( {})" , escape_unicode(&self.v)) |
886 | } else { |
887 | write!(f, "Complete( {})" , escape_unicode(&self.v)) |
888 | } |
889 | } |
890 | } |
891 | |
892 | impl AsRef<[u8]> for Literal { |
893 | fn as_ref(&self) -> &[u8] { |
894 | &self.v |
895 | } |
896 | } |
897 | |
898 | impl ops::Deref for Literal { |
899 | type Target = Vec<u8>; |
900 | fn deref(&self) -> &Vec<u8> { |
901 | &self.v |
902 | } |
903 | } |
904 | |
905 | impl ops::DerefMut for Literal { |
906 | fn deref_mut(&mut self) -> &mut Vec<u8> { |
907 | &mut self.v |
908 | } |
909 | } |
910 | |
911 | fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> { |
912 | let mut i: usize = 0; |
913 | while haystack.len() >= needle.len() { |
914 | if needle == &haystack[..needle.len()] { |
915 | return Some(i); |
916 | } |
917 | i += 1; |
918 | haystack = &haystack[1..]; |
919 | } |
920 | None |
921 | } |
922 | |
923 | fn escape_unicode(bytes: &[u8]) -> String { |
924 | let show: String = match ::std::str::from_utf8(bytes) { |
925 | Ok(v: &str) => v.to_string(), |
926 | Err(_) => escape_bytes(bytes), |
927 | }; |
928 | let mut space_escaped: String = String::new(); |
929 | for c: char in show.chars() { |
930 | if c.is_whitespace() { |
931 | let escaped: String = if c as u32 <= 0x7F { |
932 | escape_byte(c as u8) |
933 | } else if c as u32 <= 0xFFFF { |
934 | format!(r"\u{{ {:04x}}}" , c as u32) |
935 | } else { |
936 | format!(r"\U {{{:08x}}}" , c as u32) |
937 | }; |
938 | space_escaped.push_str(&escaped); |
939 | } else { |
940 | space_escaped.push(ch:c); |
941 | } |
942 | } |
943 | space_escaped |
944 | } |
945 | |
946 | fn escape_bytes(bytes: &[u8]) -> String { |
947 | let mut s: String = String::new(); |
948 | for &b: u8 in bytes { |
949 | s.push_str(&escape_byte(b)); |
950 | } |
951 | s |
952 | } |
953 | |
954 | fn escape_byte(byte: u8) -> String { |
955 | use std::ascii::escape_default; |
956 | |
957 | let escaped: Vec<u8> = escape_default(byte).collect(); |
958 | String::from_utf8_lossy(&escaped).into_owned() |
959 | } |
960 | |
961 | fn cls_char_count(cls: &hir::ClassUnicode) -> usize { |
962 | cls.iter().map(|&r: ClassUnicodeRange| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>() |
963 | as usize |
964 | } |
965 | |
966 | fn cls_byte_count(cls: &hir::ClassBytes) -> usize { |
967 | cls.iter().map(|&r: ClassBytesRange| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>() |
968 | as usize |
969 | } |
970 | |
971 | #[cfg (test)] |
972 | mod tests { |
973 | use std::fmt; |
974 | |
975 | use super::{escape_bytes, Literal, Literals}; |
976 | use crate::hir::Hir; |
977 | use crate::ParserBuilder; |
978 | |
979 | // To make test failures easier to read. |
980 | #[derive (Debug, Eq, PartialEq)] |
981 | struct Bytes(Vec<ULiteral>); |
982 | #[derive (Debug, Eq, PartialEq)] |
983 | struct Unicode(Vec<ULiteral>); |
984 | |
985 | fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> { |
986 | let mut ulits = vec![]; |
987 | for blit in blits { |
988 | ulits |
989 | .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() }); |
990 | } |
991 | ulits |
992 | } |
993 | |
994 | fn create_lits<I: IntoIterator<Item = Literal>>(it: I) -> Literals { |
995 | Literals { |
996 | lits: it.into_iter().collect(), |
997 | limit_size: 0, |
998 | limit_class: 0, |
999 | } |
1000 | } |
1001 | |
1002 | // Needs to be pub for 1.3? |
1003 | #[derive (Clone, Eq, PartialEq)] |
1004 | pub struct ULiteral { |
1005 | v: String, |
1006 | cut: bool, |
1007 | } |
1008 | |
1009 | impl ULiteral { |
1010 | fn is_cut(&self) -> bool { |
1011 | self.cut |
1012 | } |
1013 | } |
1014 | |
1015 | impl fmt::Debug for ULiteral { |
1016 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
1017 | if self.is_cut() { |
1018 | write!(f, "Cut( {})" , self.v) |
1019 | } else { |
1020 | write!(f, "Complete( {})" , self.v) |
1021 | } |
1022 | } |
1023 | } |
1024 | |
1025 | impl PartialEq<Literal> for ULiteral { |
1026 | fn eq(&self, other: &Literal) -> bool { |
1027 | self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut() |
1028 | } |
1029 | } |
1030 | |
1031 | impl PartialEq<ULiteral> for Literal { |
1032 | fn eq(&self, other: &ULiteral) -> bool { |
1033 | &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut() |
1034 | } |
1035 | } |
1036 | |
1037 | #[allow (non_snake_case)] |
1038 | fn C(s: &'static str) -> ULiteral { |
1039 | ULiteral { v: s.to_owned(), cut: true } |
1040 | } |
1041 | #[allow (non_snake_case)] |
1042 | fn M(s: &'static str) -> ULiteral { |
1043 | ULiteral { v: s.to_owned(), cut: false } |
1044 | } |
1045 | |
1046 | fn prefixes(lits: &mut Literals, expr: &Hir) { |
1047 | lits.union_prefixes(expr); |
1048 | } |
1049 | |
1050 | fn suffixes(lits: &mut Literals, expr: &Hir) { |
1051 | lits.union_suffixes(expr); |
1052 | } |
1053 | |
1054 | macro_rules! assert_lit_eq { |
1055 | ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{ |
1056 | let expected: Vec<ULiteral> = vec![$($expected_lit),*]; |
1057 | let lits = $got_lits; |
1058 | assert_eq!( |
1059 | $which(expected.clone()), |
1060 | $which(escape_lits(lits.literals()))); |
1061 | assert_eq!( |
1062 | !expected.is_empty() && expected.iter().all(|l| !l.is_cut()), |
1063 | lits.all_complete()); |
1064 | assert_eq!( |
1065 | expected.iter().any(|l| !l.is_cut()), |
1066 | lits.any_complete()); |
1067 | }}; |
1068 | } |
1069 | |
1070 | macro_rules! test_lit { |
1071 | ($name:ident, $which:ident, $re:expr) => { |
1072 | test_lit!($name, $which, $re,); |
1073 | }; |
1074 | ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => { |
1075 | #[test] |
1076 | fn $name() { |
1077 | let expr = ParserBuilder::new() |
1078 | .build() |
1079 | .parse($re) |
1080 | .unwrap(); |
1081 | let lits = Literals::$which(&expr); |
1082 | assert_lit_eq!(Unicode, lits, $($lit),*); |
1083 | |
1084 | let expr = ParserBuilder::new() |
1085 | .allow_invalid_utf8(true) |
1086 | .unicode(false) |
1087 | .build() |
1088 | .parse($re) |
1089 | .unwrap(); |
1090 | let lits = Literals::$which(&expr); |
1091 | assert_lit_eq!(Bytes, lits, $($lit),*); |
1092 | } |
1093 | }; |
1094 | } |
1095 | |
1096 | // ************************************************************************ |
1097 | // Tests for prefix literal extraction. |
1098 | // ************************************************************************ |
1099 | |
1100 | // Elementary tests. |
1101 | test_lit!(pfx_one_lit1, prefixes, "a" , M("a" )); |
1102 | test_lit!(pfx_one_lit2, prefixes, "abc" , M("abc" )); |
1103 | test_lit!(pfx_one_lit3, prefixes, "(?u)☃" , M(" \\xe2 \\x98 \\x83" )); |
1104 | #[cfg (feature = "unicode-case" )] |
1105 | test_lit!(pfx_one_lit4, prefixes, "(?ui)☃" , M(" \\xe2 \\x98 \\x83" )); |
1106 | test_lit!(pfx_class1, prefixes, "[1-4]" , M("1" ), M("2" ), M("3" ), M("4" )); |
1107 | test_lit!( |
1108 | pfx_class2, |
1109 | prefixes, |
1110 | "(?u)[☃Ⅰ]" , |
1111 | M(" \\xe2 \\x85 \\xa0" ), |
1112 | M(" \\xe2 \\x98 \\x83" ) |
1113 | ); |
1114 | #[cfg (feature = "unicode-case" )] |
1115 | test_lit!( |
1116 | pfx_class3, |
1117 | prefixes, |
1118 | "(?ui)[☃Ⅰ]" , |
1119 | M(" \\xe2 \\x85 \\xa0" ), |
1120 | M(" \\xe2 \\x85 \\xb0" ), |
1121 | M(" \\xe2 \\x98 \\x83" ) |
1122 | ); |
1123 | test_lit!(pfx_one_lit_casei1, prefixes, "(?i-u)a" , M("A" ), M("a" )); |
1124 | test_lit!( |
1125 | pfx_one_lit_casei2, |
1126 | prefixes, |
1127 | "(?i-u)abc" , |
1128 | M("ABC" ), |
1129 | M("aBC" ), |
1130 | M("AbC" ), |
1131 | M("abC" ), |
1132 | M("ABc" ), |
1133 | M("aBc" ), |
1134 | M("Abc" ), |
1135 | M("abc" ) |
1136 | ); |
1137 | test_lit!(pfx_group1, prefixes, "(a)" , M("a" )); |
1138 | test_lit!(pfx_rep_zero_or_one1, prefixes, "a?" ); |
1139 | test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?" ); |
1140 | test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?" , C("ab" ), M("a" )); |
1141 | // FIXME: This should return [M("a"), M("ab")] because of the non-greedy |
1142 | // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get |
1143 | // a cut literal. |
1144 | test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??" , C("ab" ), M("a" )); |
1145 | test_lit!(pfx_rep_zero_or_more1, prefixes, "a*" ); |
1146 | test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*" ); |
1147 | test_lit!(pfx_rep_one_or_more1, prefixes, "a+" , C("a" )); |
1148 | test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+" , C("abc" )); |
1149 | test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+" , C("a" )); |
1150 | test_lit!(pfx_rep_range1, prefixes, "a{0}" ); |
1151 | test_lit!(pfx_rep_range2, prefixes, "a{0,}" ); |
1152 | test_lit!(pfx_rep_range3, prefixes, "a{0,1}" ); |
1153 | test_lit!(pfx_rep_range4, prefixes, "a{1}" , M("a" )); |
1154 | test_lit!(pfx_rep_range5, prefixes, "a{2}" , M("aa" )); |
1155 | test_lit!(pfx_rep_range6, prefixes, "a{1,2}" , C("a" )); |
1156 | test_lit!(pfx_rep_range7, prefixes, "a{2,3}" , C("aa" )); |
1157 | |
1158 | // Test regexes with concatenations. |
1159 | test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)" , M("ab" )); |
1160 | test_lit!(pfx_cat2, prefixes, "[ab]z" , M("az" ), M("bz" )); |
1161 | test_lit!( |
1162 | pfx_cat3, |
1163 | prefixes, |
1164 | "(?i-u)[ab]z" , |
1165 | M("AZ" ), |
1166 | M("BZ" ), |
1167 | M("aZ" ), |
1168 | M("bZ" ), |
1169 | M("Az" ), |
1170 | M("Bz" ), |
1171 | M("az" ), |
1172 | M("bz" ) |
1173 | ); |
1174 | test_lit!( |
1175 | pfx_cat4, |
1176 | prefixes, |
1177 | "[ab][yz]" , |
1178 | M("ay" ), |
1179 | M("by" ), |
1180 | M("az" ), |
1181 | M("bz" ) |
1182 | ); |
1183 | test_lit!(pfx_cat5, prefixes, "a*b" , C("a" ), M("b" )); |
1184 | test_lit!(pfx_cat6, prefixes, "a*b*c" , C("a" ), C("b" ), M("c" )); |
1185 | test_lit!(pfx_cat7, prefixes, "a*b*c+" , C("a" ), C("b" ), C("c" )); |
1186 | test_lit!(pfx_cat8, prefixes, "a*b+c" , C("a" ), C("b" )); |
1187 | test_lit!(pfx_cat9, prefixes, "a*b+c*" , C("a" ), C("b" )); |
1188 | test_lit!(pfx_cat10, prefixes, "ab*" , C("ab" ), M("a" )); |
1189 | test_lit!(pfx_cat11, prefixes, "ab*c" , C("ab" ), M("ac" )); |
1190 | test_lit!(pfx_cat12, prefixes, "ab+" , C("ab" )); |
1191 | test_lit!(pfx_cat13, prefixes, "ab+c" , C("ab" )); |
1192 | test_lit!(pfx_cat14, prefixes, "a^" , C("a" )); |
1193 | test_lit!(pfx_cat15, prefixes, "$a" ); |
1194 | test_lit!(pfx_cat16, prefixes, r"ab*c" , C("ab" ), M("ac" )); |
1195 | test_lit!(pfx_cat17, prefixes, r"ab+c" , C("ab" )); |
1196 | test_lit!(pfx_cat18, prefixes, r"z*azb" , C("z" ), M("azb" )); |
1197 | test_lit!(pfx_cat19, prefixes, "a.z" , C("a" )); |
1198 | |
1199 | // Test regexes with alternations. |
1200 | test_lit!(pfx_alt1, prefixes, "a|b" , M("a" ), M("b" )); |
1201 | test_lit!(pfx_alt2, prefixes, "[1-3]|b" , M("1" ), M("2" ), M("3" ), M("b" )); |
1202 | test_lit!(pfx_alt3, prefixes, "y(?:a|b)z" , M("yaz" ), M("ybz" )); |
1203 | test_lit!(pfx_alt4, prefixes, "a|b*" ); |
1204 | test_lit!(pfx_alt5, prefixes, "a|b+" , M("a" ), C("b" )); |
1205 | test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)" ); |
1206 | test_lit!( |
1207 | pfx_alt7, |
1208 | prefixes, |
1209 | "(a|b)*c|(a|ab)*c" , |
1210 | C("a" ), |
1211 | C("b" ), |
1212 | M("c" ), |
1213 | C("a" ), |
1214 | C("ab" ), |
1215 | M("c" ) |
1216 | ); |
1217 | test_lit!(pfx_alt8, prefixes, "a*b|c" , C("a" ), M("b" ), M("c" )); |
1218 | |
1219 | // Test regexes with empty assertions. |
1220 | test_lit!(pfx_empty1, prefixes, "^a" , M("a" )); |
1221 | test_lit!(pfx_empty2, prefixes, "a${2}" , C("a" )); |
1222 | test_lit!(pfx_empty3, prefixes, "^abc" , M("abc" )); |
1223 | test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)" , M("abc" ), M("z" )); |
1224 | |
1225 | // Make sure some curious regexes have no prefixes. |
1226 | test_lit!(pfx_nothing1, prefixes, "." ); |
1227 | test_lit!(pfx_nothing2, prefixes, "(?s)." ); |
1228 | test_lit!(pfx_nothing3, prefixes, "^" ); |
1229 | test_lit!(pfx_nothing4, prefixes, "$" ); |
1230 | test_lit!(pfx_nothing6, prefixes, "(?m)$" ); |
1231 | test_lit!(pfx_nothing7, prefixes, r"\b" ); |
1232 | test_lit!(pfx_nothing8, prefixes, r"\B" ); |
1233 | |
1234 | // Test a few regexes that defeat any prefix literal detection. |
1235 | test_lit!(pfx_defeated1, prefixes, ".a" ); |
1236 | test_lit!(pfx_defeated2, prefixes, "(?s).a" ); |
1237 | test_lit!(pfx_defeated3, prefixes, "a*b*c*" ); |
1238 | test_lit!(pfx_defeated4, prefixes, "a|." ); |
1239 | test_lit!(pfx_defeated5, prefixes, ".|a" ); |
1240 | test_lit!(pfx_defeated6, prefixes, "a|^" ); |
1241 | test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))" ); |
1242 | test_lit!(pfx_defeated8, prefixes, "$a" ); |
1243 | test_lit!(pfx_defeated9, prefixes, "(?m)$a" ); |
1244 | test_lit!(pfx_defeated10, prefixes, r"\ba" ); |
1245 | test_lit!(pfx_defeated11, prefixes, r"\Ba" ); |
1246 | test_lit!(pfx_defeated12, prefixes, "^*a" ); |
1247 | test_lit!(pfx_defeated13, prefixes, "^+a" ); |
1248 | |
1249 | test_lit!( |
1250 | pfx_crazy1, |
1251 | prefixes, |
1252 | r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" , |
1253 | C("Mo \\'" ), |
1254 | C("Mu \\'" ), |
1255 | C("Moam" ), |
1256 | C("Muam" ) |
1257 | ); |
1258 | |
1259 | // ************************************************************************ |
1260 | // Tests for quiting prefix literal search. |
1261 | // ************************************************************************ |
1262 | |
1263 | macro_rules! test_exhausted { |
1264 | ($name:ident, $which:ident, $re:expr) => { |
1265 | test_exhausted!($name, $which, $re,); |
1266 | }; |
1267 | ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => { |
1268 | #[test] |
1269 | fn $name() { |
1270 | let expr = ParserBuilder::new() |
1271 | .build() |
1272 | .parse($re) |
1273 | .unwrap(); |
1274 | let mut lits = Literals::empty(); |
1275 | lits.set_limit_size(20).set_limit_class(10); |
1276 | $which(&mut lits, &expr); |
1277 | assert_lit_eq!(Unicode, lits, $($lit),*); |
1278 | |
1279 | let expr = ParserBuilder::new() |
1280 | .allow_invalid_utf8(true) |
1281 | .unicode(false) |
1282 | .build() |
1283 | .parse($re) |
1284 | .unwrap(); |
1285 | let mut lits = Literals::empty(); |
1286 | lits.set_limit_size(20).set_limit_class(10); |
1287 | $which(&mut lits, &expr); |
1288 | assert_lit_eq!(Bytes, lits, $($lit),*); |
1289 | } |
1290 | }; |
1291 | } |
1292 | |
1293 | // These test use a much lower limit than the default so that we can |
1294 | // write test cases of reasonable size. |
1295 | test_exhausted!(pfx_exhausted1, prefixes, "[a-z]" ); |
1296 | test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A" ); |
1297 | test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z" , C("A" )); |
1298 | test_exhausted!( |
1299 | pfx_exhausted4, |
1300 | prefixes, |
1301 | "(?i-u)foobar" , |
1302 | C("FO" ), |
1303 | C("fO" ), |
1304 | C("Fo" ), |
1305 | C("fo" ) |
1306 | ); |
1307 | test_exhausted!( |
1308 | pfx_exhausted5, |
1309 | prefixes, |
1310 | "(?:ab){100}" , |
1311 | C("abababababababababab" ) |
1312 | ); |
1313 | test_exhausted!( |
1314 | pfx_exhausted6, |
1315 | prefixes, |
1316 | "(?:(?:ab){100})*cd" , |
1317 | C("ababababab" ), |
1318 | M("cd" ) |
1319 | ); |
1320 | test_exhausted!( |
1321 | pfx_exhausted7, |
1322 | prefixes, |
1323 | "z(?:(?:ab){100})*cd" , |
1324 | C("zababababab" ), |
1325 | M("zcd" ) |
1326 | ); |
1327 | test_exhausted!( |
1328 | pfx_exhausted8, |
1329 | prefixes, |
1330 | "aaaaaaaaaaaaaaaaaaaaz" , |
1331 | C("aaaaaaaaaaaaaaaaaaaa" ) |
1332 | ); |
1333 | |
1334 | // ************************************************************************ |
1335 | // Tests for suffix literal extraction. |
1336 | // ************************************************************************ |
1337 | |
1338 | // Elementary tests. |
1339 | test_lit!(sfx_one_lit1, suffixes, "a" , M("a" )); |
1340 | test_lit!(sfx_one_lit2, suffixes, "abc" , M("abc" )); |
1341 | test_lit!(sfx_one_lit3, suffixes, "(?u)☃" , M(" \\xe2 \\x98 \\x83" )); |
1342 | #[cfg (feature = "unicode-case" )] |
1343 | test_lit!(sfx_one_lit4, suffixes, "(?ui)☃" , M(" \\xe2 \\x98 \\x83" )); |
1344 | test_lit!(sfx_class1, suffixes, "[1-4]" , M("1" ), M("2" ), M("3" ), M("4" )); |
1345 | test_lit!( |
1346 | sfx_class2, |
1347 | suffixes, |
1348 | "(?u)[☃Ⅰ]" , |
1349 | M(" \\xe2 \\x85 \\xa0" ), |
1350 | M(" \\xe2 \\x98 \\x83" ) |
1351 | ); |
1352 | #[cfg (feature = "unicode-case" )] |
1353 | test_lit!( |
1354 | sfx_class3, |
1355 | suffixes, |
1356 | "(?ui)[☃Ⅰ]" , |
1357 | M(" \\xe2 \\x85 \\xa0" ), |
1358 | M(" \\xe2 \\x85 \\xb0" ), |
1359 | M(" \\xe2 \\x98 \\x83" ) |
1360 | ); |
1361 | test_lit!(sfx_one_lit_casei1, suffixes, "(?i-u)a" , M("A" ), M("a" )); |
1362 | test_lit!( |
1363 | sfx_one_lit_casei2, |
1364 | suffixes, |
1365 | "(?i-u)abc" , |
1366 | M("ABC" ), |
1367 | M("ABc" ), |
1368 | M("AbC" ), |
1369 | M("Abc" ), |
1370 | M("aBC" ), |
1371 | M("aBc" ), |
1372 | M("abC" ), |
1373 | M("abc" ) |
1374 | ); |
1375 | test_lit!(sfx_group1, suffixes, "(a)" , M("a" )); |
1376 | test_lit!(sfx_rep_zero_or_one1, suffixes, "a?" ); |
1377 | test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?" ); |
1378 | test_lit!(sfx_rep_zero_or_more1, suffixes, "a*" ); |
1379 | test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*" ); |
1380 | test_lit!(sfx_rep_one_or_more1, suffixes, "a+" , C("a" )); |
1381 | test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+" , C("abc" )); |
1382 | test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+" , C("a" )); |
1383 | test_lit!(sfx_rep_range1, suffixes, "a{0}" ); |
1384 | test_lit!(sfx_rep_range2, suffixes, "a{0,}" ); |
1385 | test_lit!(sfx_rep_range3, suffixes, "a{0,1}" ); |
1386 | test_lit!(sfx_rep_range4, suffixes, "a{1}" , M("a" )); |
1387 | test_lit!(sfx_rep_range5, suffixes, "a{2}" , M("aa" )); |
1388 | test_lit!(sfx_rep_range6, suffixes, "a{1,2}" , C("a" )); |
1389 | test_lit!(sfx_rep_range7, suffixes, "a{2,3}" , C("aa" )); |
1390 | |
1391 | // Test regexes with concatenations. |
1392 | test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)" , M("ab" )); |
1393 | test_lit!(sfx_cat2, suffixes, "[ab]z" , M("az" ), M("bz" )); |
1394 | test_lit!( |
1395 | sfx_cat3, |
1396 | suffixes, |
1397 | "(?i-u)[ab]z" , |
1398 | M("AZ" ), |
1399 | M("Az" ), |
1400 | M("BZ" ), |
1401 | M("Bz" ), |
1402 | M("aZ" ), |
1403 | M("az" ), |
1404 | M("bZ" ), |
1405 | M("bz" ) |
1406 | ); |
1407 | test_lit!( |
1408 | sfx_cat4, |
1409 | suffixes, |
1410 | "[ab][yz]" , |
1411 | M("ay" ), |
1412 | M("az" ), |
1413 | M("by" ), |
1414 | M("bz" ) |
1415 | ); |
1416 | test_lit!(sfx_cat5, suffixes, "a*b" , C("ab" ), M("b" )); |
1417 | test_lit!(sfx_cat6, suffixes, "a*b*c" , C("bc" ), C("ac" ), M("c" )); |
1418 | test_lit!(sfx_cat7, suffixes, "a*b*c+" , C("c" )); |
1419 | test_lit!(sfx_cat8, suffixes, "a*b+c" , C("bc" )); |
1420 | test_lit!(sfx_cat9, suffixes, "a*b+c*" , C("c" ), C("b" )); |
1421 | test_lit!(sfx_cat10, suffixes, "ab*" , C("b" ), M("a" )); |
1422 | test_lit!(sfx_cat11, suffixes, "ab*c" , C("bc" ), M("ac" )); |
1423 | test_lit!(sfx_cat12, suffixes, "ab+" , C("b" )); |
1424 | test_lit!(sfx_cat13, suffixes, "ab+c" , C("bc" )); |
1425 | test_lit!(sfx_cat14, suffixes, "a^" ); |
1426 | test_lit!(sfx_cat15, suffixes, "$a" , C("a" )); |
1427 | test_lit!(sfx_cat16, suffixes, r"ab*c" , C("bc" ), M("ac" )); |
1428 | test_lit!(sfx_cat17, suffixes, r"ab+c" , C("bc" )); |
1429 | test_lit!(sfx_cat18, suffixes, r"z*azb" , C("zazb" ), M("azb" )); |
1430 | test_lit!(sfx_cat19, suffixes, "a.z" , C("z" )); |
1431 | |
1432 | // Test regexes with alternations. |
1433 | test_lit!(sfx_alt1, suffixes, "a|b" , M("a" ), M("b" )); |
1434 | test_lit!(sfx_alt2, suffixes, "[1-3]|b" , M("1" ), M("2" ), M("3" ), M("b" )); |
1435 | test_lit!(sfx_alt3, suffixes, "y(?:a|b)z" , M("yaz" ), M("ybz" )); |
1436 | test_lit!(sfx_alt4, suffixes, "a|b*" ); |
1437 | test_lit!(sfx_alt5, suffixes, "a|b+" , M("a" ), C("b" )); |
1438 | test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)" ); |
1439 | test_lit!( |
1440 | sfx_alt7, |
1441 | suffixes, |
1442 | "(a|b)*c|(a|ab)*c" , |
1443 | C("ac" ), |
1444 | C("bc" ), |
1445 | M("c" ), |
1446 | C("ac" ), |
1447 | C("abc" ), |
1448 | M("c" ) |
1449 | ); |
1450 | test_lit!(sfx_alt8, suffixes, "a*b|c" , C("ab" ), M("b" ), M("c" )); |
1451 | |
1452 | // Test regexes with empty assertions. |
1453 | test_lit!(sfx_empty1, suffixes, "a$" , M("a" )); |
1454 | test_lit!(sfx_empty2, suffixes, "${2}a" , C("a" )); |
1455 | |
1456 | // Make sure some curious regexes have no suffixes. |
1457 | test_lit!(sfx_nothing1, suffixes, "." ); |
1458 | test_lit!(sfx_nothing2, suffixes, "(?s)." ); |
1459 | test_lit!(sfx_nothing3, suffixes, "^" ); |
1460 | test_lit!(sfx_nothing4, suffixes, "$" ); |
1461 | test_lit!(sfx_nothing6, suffixes, "(?m)$" ); |
1462 | test_lit!(sfx_nothing7, suffixes, r"\b" ); |
1463 | test_lit!(sfx_nothing8, suffixes, r"\B" ); |
1464 | |
1465 | // Test a few regexes that defeat any suffix literal detection. |
1466 | test_lit!(sfx_defeated1, suffixes, "a." ); |
1467 | test_lit!(sfx_defeated2, suffixes, "(?s)a." ); |
1468 | test_lit!(sfx_defeated3, suffixes, "a*b*c*" ); |
1469 | test_lit!(sfx_defeated4, suffixes, "a|." ); |
1470 | test_lit!(sfx_defeated5, suffixes, ".|a" ); |
1471 | test_lit!(sfx_defeated6, suffixes, "a|^" ); |
1472 | test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c))." ); |
1473 | test_lit!(sfx_defeated8, suffixes, "a^" ); |
1474 | test_lit!(sfx_defeated9, suffixes, "(?m)a$" ); |
1475 | test_lit!(sfx_defeated10, suffixes, r"a\b" ); |
1476 | test_lit!(sfx_defeated11, suffixes, r"a\B" ); |
1477 | test_lit!(sfx_defeated12, suffixes, "a^*" ); |
1478 | test_lit!(sfx_defeated13, suffixes, "a^+" ); |
1479 | |
1480 | // These test use a much lower limit than the default so that we can |
1481 | // write test cases of reasonable size. |
1482 | test_exhausted!(sfx_exhausted1, suffixes, "[a-z]" ); |
1483 | test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*" ); |
1484 | test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z" , C("Z" )); |
1485 | test_exhausted!( |
1486 | sfx_exhausted4, |
1487 | suffixes, |
1488 | "(?i-u)foobar" , |
1489 | C("AR" ), |
1490 | C("Ar" ), |
1491 | C("aR" ), |
1492 | C("ar" ) |
1493 | ); |
1494 | test_exhausted!( |
1495 | sfx_exhausted5, |
1496 | suffixes, |
1497 | "(?:ab){100}" , |
1498 | C("abababababababababab" ) |
1499 | ); |
1500 | test_exhausted!( |
1501 | sfx_exhausted6, |
1502 | suffixes, |
1503 | "cd(?:(?:ab){100})*" , |
1504 | C("ababababab" ), |
1505 | M("cd" ) |
1506 | ); |
1507 | test_exhausted!( |
1508 | sfx_exhausted7, |
1509 | suffixes, |
1510 | "cd(?:(?:ab){100})*z" , |
1511 | C("abababababz" ), |
1512 | M("cdz" ) |
1513 | ); |
1514 | test_exhausted!( |
1515 | sfx_exhausted8, |
1516 | suffixes, |
1517 | "zaaaaaaaaaaaaaaaaaaaa" , |
1518 | C("aaaaaaaaaaaaaaaaaaaa" ) |
1519 | ); |
1520 | |
1521 | // ************************************************************************ |
1522 | // Tests for generating unambiguous literal sets. |
1523 | // ************************************************************************ |
1524 | |
1525 | macro_rules! test_unamb { |
1526 | ($name:ident, $given:expr, $expected:expr) => { |
1527 | #[test] |
1528 | fn $name() { |
1529 | let given: Vec<Literal> = $given |
1530 | .into_iter() |
1531 | .map(|ul| { |
1532 | let cut = ul.is_cut(); |
1533 | Literal { v: ul.v.into_bytes(), cut: cut } |
1534 | }) |
1535 | .collect(); |
1536 | let lits = create_lits(given); |
1537 | let got = lits.unambiguous_prefixes(); |
1538 | assert_eq!($expected, escape_lits(got.literals())); |
1539 | } |
1540 | }; |
1541 | } |
1542 | |
1543 | test_unamb!(unambiguous1, vec![M("z" ), M("azb" )], vec![C("a" ), C("z" )]); |
1544 | test_unamb!( |
1545 | unambiguous2, |
1546 | vec![M("zaaaaaa" ), M("aa" )], |
1547 | vec![C("aa" ), C("z" )] |
1548 | ); |
1549 | test_unamb!( |
1550 | unambiguous3, |
1551 | vec![M("Sherlock" ), M("Watson" )], |
1552 | vec![M("Sherlock" ), M("Watson" )] |
1553 | ); |
1554 | test_unamb!(unambiguous4, vec![M("abc" ), M("bc" )], vec![C("a" ), C("bc" )]); |
1555 | test_unamb!(unambiguous5, vec![M("bc" ), M("abc" )], vec![C("a" ), C("bc" )]); |
1556 | test_unamb!(unambiguous6, vec![M("a" ), M("aa" )], vec![C("a" )]); |
1557 | test_unamb!(unambiguous7, vec![M("aa" ), M("a" )], vec![C("a" )]); |
1558 | test_unamb!(unambiguous8, vec![M("ab" ), M("a" )], vec![C("a" )]); |
1559 | test_unamb!( |
1560 | unambiguous9, |
1561 | vec![M("ac" ), M("bc" ), M("c" ), M("ac" ), M("abc" ), M("c" )], |
1562 | vec![C("a" ), C("b" ), C("c" )] |
1563 | ); |
1564 | test_unamb!( |
1565 | unambiguous10, |
1566 | vec![M("Mo'" ), M("Mu'" ), M("Mo" ), M("Mu" )], |
1567 | vec![C("Mo" ), C("Mu" )] |
1568 | ); |
1569 | test_unamb!( |
1570 | unambiguous11, |
1571 | vec![M("zazb" ), M("azb" )], |
1572 | vec![C("a" ), C("z" )] |
1573 | ); |
1574 | test_unamb!(unambiguous12, vec![M("foo" ), C("foo" )], vec![C("foo" )]); |
1575 | test_unamb!( |
1576 | unambiguous13, |
1577 | vec![M("ABCX" ), M("CDAX" ), M("BCX" )], |
1578 | vec![C("A" ), C("BCX" ), C("CD" )] |
1579 | ); |
1580 | test_unamb!( |
1581 | unambiguous14, |
1582 | vec![M("IMGX" ), M("MVIX" ), M("MGX" ), M("DSX" )], |
1583 | vec![M("DSX" ), C("I" ), C("MGX" ), C("MV" )] |
1584 | ); |
1585 | test_unamb!( |
1586 | unambiguous15, |
1587 | vec![M("IMG_" ), M("MG_" ), M("CIMG" )], |
1588 | vec![C("C" ), C("I" ), C("MG_" )] |
1589 | ); |
1590 | |
1591 | // ************************************************************************ |
1592 | // Tests for suffix trimming. |
1593 | // ************************************************************************ |
1594 | macro_rules! test_trim { |
1595 | ($name:ident, $trim:expr, $given:expr, $expected:expr) => { |
1596 | #[test] |
1597 | fn $name() { |
1598 | let given: Vec<Literal> = $given |
1599 | .into_iter() |
1600 | .map(|ul| { |
1601 | let cut = ul.is_cut(); |
1602 | Literal { v: ul.v.into_bytes(), cut: cut } |
1603 | }) |
1604 | .collect(); |
1605 | let lits = create_lits(given); |
1606 | let got = lits.trim_suffix($trim).unwrap(); |
1607 | assert_eq!($expected, escape_lits(got.literals())); |
1608 | } |
1609 | }; |
1610 | } |
1611 | |
1612 | test_trim!(trim1, 1, vec![M("ab" ), M("yz" )], vec![C("a" ), C("y" )]); |
1613 | test_trim!(trim2, 1, vec![M("abc" ), M("abd" )], vec![C("ab" )]); |
1614 | test_trim!(trim3, 2, vec![M("abc" ), M("abd" )], vec![C("a" )]); |
1615 | test_trim!(trim4, 2, vec![M("abc" ), M("ghij" )], vec![C("a" ), C("gh" )]); |
1616 | |
1617 | // ************************************************************************ |
1618 | // Tests for longest common prefix. |
1619 | // ************************************************************************ |
1620 | |
1621 | macro_rules! test_lcp { |
1622 | ($name:ident, $given:expr, $expected:expr) => { |
1623 | #[test] |
1624 | fn $name() { |
1625 | let given: Vec<Literal> = $given |
1626 | .into_iter() |
1627 | .map(|s: &str| Literal { |
1628 | v: s.to_owned().into_bytes(), |
1629 | cut: false, |
1630 | }) |
1631 | .collect(); |
1632 | let lits = create_lits(given); |
1633 | let got = lits.longest_common_prefix(); |
1634 | assert_eq!($expected, escape_bytes(got)); |
1635 | } |
1636 | }; |
1637 | } |
1638 | |
1639 | test_lcp!(lcp1, vec!["a" ], "a" ); |
1640 | test_lcp!(lcp2, vec![], "" ); |
1641 | test_lcp!(lcp3, vec!["a" , "b" ], "" ); |
1642 | test_lcp!(lcp4, vec!["ab" , "ab" ], "ab" ); |
1643 | test_lcp!(lcp5, vec!["ab" , "a" ], "a" ); |
1644 | test_lcp!(lcp6, vec!["a" , "ab" ], "a" ); |
1645 | test_lcp!(lcp7, vec!["ab" , "b" ], "" ); |
1646 | test_lcp!(lcp8, vec!["b" , "ab" ], "" ); |
1647 | test_lcp!(lcp9, vec!["foobar" , "foobaz" ], "fooba" ); |
1648 | test_lcp!(lcp10, vec!["foobar" , "foobaz" , "a" ], "" ); |
1649 | test_lcp!(lcp11, vec!["a" , "foobar" , "foobaz" ], "" ); |
1650 | test_lcp!(lcp12, vec!["foo" , "flub" , "flab" , "floo" ], "f" ); |
1651 | |
1652 | // ************************************************************************ |
1653 | // Tests for longest common suffix. |
1654 | // ************************************************************************ |
1655 | |
1656 | macro_rules! test_lcs { |
1657 | ($name:ident, $given:expr, $expected:expr) => { |
1658 | #[test] |
1659 | fn $name() { |
1660 | let given: Vec<Literal> = $given |
1661 | .into_iter() |
1662 | .map(|s: &str| Literal { |
1663 | v: s.to_owned().into_bytes(), |
1664 | cut: false, |
1665 | }) |
1666 | .collect(); |
1667 | let lits = create_lits(given); |
1668 | let got = lits.longest_common_suffix(); |
1669 | assert_eq!($expected, escape_bytes(got)); |
1670 | } |
1671 | }; |
1672 | } |
1673 | |
1674 | test_lcs!(lcs1, vec!["a" ], "a" ); |
1675 | test_lcs!(lcs2, vec![], "" ); |
1676 | test_lcs!(lcs3, vec!["a" , "b" ], "" ); |
1677 | test_lcs!(lcs4, vec!["ab" , "ab" ], "ab" ); |
1678 | test_lcs!(lcs5, vec!["ab" , "a" ], "" ); |
1679 | test_lcs!(lcs6, vec!["a" , "ab" ], "" ); |
1680 | test_lcs!(lcs7, vec!["ab" , "b" ], "b" ); |
1681 | test_lcs!(lcs8, vec!["b" , "ab" ], "b" ); |
1682 | test_lcs!(lcs9, vec!["barfoo" , "bazfoo" ], "foo" ); |
1683 | test_lcs!(lcs10, vec!["barfoo" , "bazfoo" , "a" ], "" ); |
1684 | test_lcs!(lcs11, vec!["a" , "barfoo" , "bazfoo" ], "" ); |
1685 | test_lcs!(lcs12, vec!["flub" , "bub" , "boob" , "dub" ], "b" ); |
1686 | } |
1687 | |