1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! This module contains a single struct [`SmallCharSet`]. See its documentation for details. |
11 | //! |
12 | //! [`SmallCharSet`]: struct.SmallCharSet.html |
13 | |
14 | /// Represents a set of "small characters", those with Unicode scalar |
15 | /// values less than 64. |
16 | /// |
17 | /// This is stored as a bitmap, with 1 bit for each value. |
18 | #[derive (Debug, Eq, PartialEq, Clone, Copy, Hash)] |
19 | pub struct SmallCharSet { |
20 | pub bits: u64, |
21 | } |
22 | |
23 | impl SmallCharSet { |
24 | /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet. |
25 | /// |
26 | /// # Examples |
27 | /// |
28 | /// ```ignore |
29 | /// # use markup5ever::SmallCharSet; |
30 | /// let set = SmallCharSet { |
31 | /// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000 |
32 | /// }; |
33 | /// assert!(set.contains(64)); |
34 | /// assert!(set.contains(b'6' )); // `b'6'` is the same as 64u8 |
35 | /// ``` |
36 | #[inline ] |
37 | fn contains(&self, n: u8) -> bool { |
38 | 0 != (self.bits & (1 << (n as usize))) |
39 | } |
40 | |
41 | /// Count the number of bytes of characters at the beginning of `buf` which are not in the set. |
42 | /// |
43 | /// This functionality is used in [`BufferQueue::pop_except_from`]. |
44 | /// |
45 | /// # Examples |
46 | /// |
47 | /// ``` |
48 | /// # #[macro_use ] extern crate markup5ever; |
49 | /// # fn main() { |
50 | /// let set = small_char_set!(48 49 50); // '0' '1' '2' |
51 | /// // `test` is 4 chars, ๐ is 4 chars, then we meet a character in the set |
52 | /// let test_str = "test๐01232afd" ; |
53 | /// assert_eq!(set.nonmember_prefix_len(test_str), 8); |
54 | /// # } |
55 | /// ``` |
56 | /// |
57 | /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from |
58 | pub fn nonmember_prefix_len(&self, buf: &str) -> u32 { |
59 | let mut n = 0; |
60 | for b in buf.bytes() { |
61 | if b >= 64 || !self.contains(b) { |
62 | n += 1; |
63 | } else { |
64 | break; |
65 | } |
66 | } |
67 | n |
68 | } |
69 | } |
70 | |
71 | #[cfg (test)] |
72 | mod test { |
73 | use std::iter::repeat; |
74 | |
75 | #[test ] |
76 | fn nonmember_prefix() { |
77 | for &c in ['&' , ' \0' ].iter() { |
78 | for x in 0..48u32 { |
79 | for y in 0..48u32 { |
80 | let mut s = repeat("x" ).take(x as usize).collect::<String>(); |
81 | s.push(c); |
82 | s.push_str(&repeat("x" ).take(y as usize).collect::<String>()); |
83 | let set = small_char_set!('&' ' \0' ); |
84 | |
85 | assert_eq!(x, set.nonmember_prefix_len(&s)); |
86 | } |
87 | } |
88 | } |
89 | } |
90 | } |
91 | |