1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! This module contains a single struct [`SmallCharSet`]. See its documentation for details.
11//!
12//! [`SmallCharSet`]: struct.SmallCharSet.html
13
14/// Represents a set of "small characters", those with Unicode scalar
15/// values less than 64.
16///
17/// This is stored as a bitmap, with 1 bit for each value.
18#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
19pub struct SmallCharSet {
20 pub bits: u64,
21}
22
23impl SmallCharSet {
24 /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet.
25 ///
26 /// # Examples
27 ///
28 /// ```ignore
29 /// # use markup5ever::SmallCharSet;
30 /// let set = SmallCharSet {
31 /// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000
32 /// };
33 /// assert!(set.contains(64));
34 /// assert!(set.contains(b'6')); // `b'6'` is the same as 64u8
35 /// ```
36 #[inline]
37 fn contains(&self, n: u8) -> bool {
38 0 != (self.bits & (1 << (n as usize)))
39 }
40
41 /// Count the number of bytes of characters at the beginning of `buf` which are not in the set.
42 ///
43 /// This functionality is used in [`BufferQueue::pop_except_from`].
44 ///
45 /// # Examples
46 ///
47 /// ```
48 /// # #[macro_use] extern crate markup5ever;
49 /// # fn main() {
50 /// let set = small_char_set!(48 49 50); // '0' '1' '2'
51 /// // `test` is 4 chars, ๐Ÿ˜ is 4 chars, then we meet a character in the set
52 /// let test_str = "test๐Ÿ˜01232afd";
53 /// assert_eq!(set.nonmember_prefix_len(test_str), 8);
54 /// # }
55 /// ```
56 ///
57 /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from
58 pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
59 let mut n = 0;
60 for b in buf.bytes() {
61 if b >= 64 || !self.contains(b) {
62 n += 1;
63 } else {
64 break;
65 }
66 }
67 n
68 }
69}
70
71#[cfg(test)]
72mod test {
73 use std::iter::repeat;
74
75 #[test]
76 fn nonmember_prefix() {
77 for &c in ['&', '\0'].iter() {
78 for x in 0..48u32 {
79 for y in 0..48u32 {
80 let mut s = repeat("x").take(x as usize).collect::<String>();
81 s.push(c);
82 s.push_str(&repeat("x").take(y as usize).collect::<String>());
83 let set = small_char_set!('&' '\0');
84
85 assert_eq!(x, set.nonmember_prefix_len(&s));
86 }
87 }
88 }
89 }
90}
91