| 1 | // Copyright 2014-2017 The html5ever Project Developers. See the | 
| 2 | // COPYRIGHT file at the top-level directory of this distribution. | 
|---|
| 3 | // | 
|---|
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|---|
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|---|
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | 
|---|
| 7 | // option. This file may not be copied, modified, or distributed | 
|---|
| 8 | // except according to those terms. | 
|---|
| 9 |  | 
|---|
| 10 | //! This module contains a single struct [`SmallCharSet`]. See its documentation for details. | 
|---|
| 11 | //! | 
|---|
| 12 | //! [`SmallCharSet`]: struct.SmallCharSet.html | 
|---|
| 13 |  | 
|---|
| 14 | /// Represents a set of "small characters", those with Unicode scalar | 
|---|
| 15 | /// values less than 64. | 
|---|
| 16 | /// | 
|---|
| 17 | /// This is stored as a bitmap, with 1 bit for each value. | 
|---|
| 18 | #[ derive(Debug, Eq, PartialEq, Clone, Copy, Hash)] | 
|---|
| 19 | pub struct SmallCharSet { | 
|---|
| 20 | pub bits: u64, | 
|---|
| 21 | } | 
|---|
| 22 |  | 
|---|
| 23 | impl SmallCharSet { | 
|---|
| 24 | /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet. | 
|---|
| 25 | /// | 
|---|
| 26 | /// # Examples | 
|---|
| 27 | /// | 
|---|
| 28 | /// ```ignore | 
|---|
| 29 | /// # use markup5ever::SmallCharSet; | 
|---|
| 30 | /// let set = SmallCharSet { | 
|---|
| 31 | ///     bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000 | 
|---|
| 32 | /// }; | 
|---|
| 33 | /// assert!(set.contains(64)); | 
|---|
| 34 | /// assert!(set.contains( b'6')); // `b'6'` is the same as 64u8 | 
|---|
| 35 | /// ``` | 
|---|
| 36 | #[ inline] | 
|---|
| 37 | fn contains(&self, n: u8) -> bool { | 
|---|
| 38 | 0 != (self.bits & (1 << (n as usize))) | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | /// Count the number of bytes of characters at the beginning of `buf` which are not in the set. | 
|---|
| 42 | /// | 
|---|
| 43 | /// This functionality is used in [`BufferQueue::pop_except_from`]. | 
|---|
| 44 | /// | 
|---|
| 45 | /// # Examples | 
|---|
| 46 | /// | 
|---|
| 47 | /// ``` | 
|---|
| 48 | /// # #[ macro_use] extern crate markup5ever; | 
|---|
| 49 | /// # fn main() { | 
|---|
| 50 | /// let set = small_char_set!(48 49 50); // '0' '1' '2' | 
|---|
| 51 | /// // `test` is 4 chars, ๐ is 4 chars, then we meet a character in the set | 
|---|
| 52 | /// let test_str = "test๐01232afd"; | 
|---|
| 53 | /// assert_eq!(set.nonmember_prefix_len(test_str), 8); | 
|---|
| 54 | /// # } | 
|---|
| 55 | /// ``` | 
|---|
| 56 | /// | 
|---|
| 57 | /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from | 
|---|
| 58 | pub fn nonmember_prefix_len(&self, buf: &str) -> u32 { | 
|---|
| 59 | let mut n = 0; | 
|---|
| 60 | for b in buf.bytes() { | 
|---|
| 61 | if b >= 64 || !self.contains(b) { | 
|---|
| 62 | n += 1; | 
|---|
| 63 | } else { | 
|---|
| 64 | break; | 
|---|
| 65 | } | 
|---|
| 66 | } | 
|---|
| 67 | n | 
|---|
| 68 | } | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | #[ cfg(test)] | 
|---|
| 72 | mod test { | 
|---|
| 73 | #[ test] | 
|---|
| 74 | fn nonmember_prefix() { | 
|---|
| 75 | for &c in [ '&', '\0 '].iter() { | 
|---|
| 76 | for x in 0..48u32 { | 
|---|
| 77 | for y in 0..48u32 { | 
|---|
| 78 | let mut s = "x".repeat(x as usize); | 
|---|
| 79 | s.push(c); | 
|---|
| 80 | s.push_str(& "x".repeat(y as usize)); | 
|---|
| 81 | let set = small_char_set!( '&' '\0 '); | 
|---|
| 82 |  | 
|---|
| 83 | assert_eq!(x, set.nonmember_prefix_len(&s)); | 
|---|
| 84 | } | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 | } | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|