1 | use std::fmt; |
2 | |
3 | use log::warn; |
4 | |
5 | use crate::stream::Stream; |
6 | use crate::Error; |
7 | |
8 | |
9 | /// An attribute selector operator. |
10 | #[derive (Clone, Copy, PartialEq, Debug)] |
11 | pub enum AttributeOperator<'a> { |
12 | /// `[attr]` |
13 | Exists, |
14 | /// `[attr=value]` |
15 | Matches(&'a str), |
16 | /// `[attr~=value]` |
17 | Contains(&'a str), |
18 | /// `[attr|=value]` |
19 | StartsWith(&'a str), |
20 | } |
21 | |
22 | impl<'a> AttributeOperator<'a> { |
23 | /// Checks that value is matching the operator. |
24 | pub fn matches(&self, value: &str) -> bool { |
25 | match *self { |
26 | AttributeOperator::Exists => { |
27 | true |
28 | } |
29 | AttributeOperator::Matches(v) => { |
30 | value == v |
31 | } |
32 | AttributeOperator::Contains(v) => { |
33 | value.split(' ' ).any(|s| s == v) |
34 | } |
35 | AttributeOperator::StartsWith(v) => { |
36 | // exactly `v` or beginning with `v` immediately followed by `-` |
37 | if value == v { |
38 | true |
39 | } else if value.starts_with(v) { |
40 | value.get(v.len()..v.len()+1) == Some("-" ) |
41 | } else { |
42 | false |
43 | } |
44 | } |
45 | } |
46 | } |
47 | } |
48 | |
49 | |
50 | /// A pseudo-class. |
51 | #[derive (Clone, Copy, PartialEq, Debug)] |
52 | #[allow (missing_docs)] |
53 | pub enum PseudoClass<'a> { |
54 | FirstChild, |
55 | Link, |
56 | Visited, |
57 | Hover, |
58 | Active, |
59 | Focus, |
60 | Lang(&'a str), |
61 | } |
62 | |
63 | impl fmt::Display for PseudoClass<'_> { |
64 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
65 | match self { |
66 | PseudoClass::FirstChild => write!(f, "first-child" ), |
67 | PseudoClass::Link => write!(f, "link" ), |
68 | PseudoClass::Visited => write!(f, "visited" ), |
69 | PseudoClass::Hover => write!(f, "hover" ), |
70 | PseudoClass::Active => write!(f, "active" ), |
71 | PseudoClass::Focus => write!(f, "focus" ), |
72 | PseudoClass::Lang(lang: &&str) => write!(f, "lang( {})" , lang), |
73 | } |
74 | } |
75 | } |
76 | |
77 | |
78 | /// A trait to query an element node metadata. |
79 | pub trait Element: Sized { |
80 | /// Returns a parent element. |
81 | fn parent_element(&self) -> Option<Self>; |
82 | |
83 | /// Returns a previous sibling element. |
84 | fn prev_sibling_element(&self) -> Option<Self>; |
85 | |
86 | /// Checks that the element has a specified local name. |
87 | fn has_local_name(&self, name: &str) -> bool; |
88 | |
89 | /// Checks that the element has a specified attribute. |
90 | fn attribute_matches(&self, local_name: &str, operator: AttributeOperator) -> bool; |
91 | |
92 | /// Checks that the element matches a specified pseudo-class. |
93 | fn pseudo_class_matches(&self, class: PseudoClass) -> bool; |
94 | } |
95 | |
96 | |
97 | #[derive (Clone, Copy, PartialEq, Debug)] |
98 | enum SimpleSelectorType<'a> { |
99 | Type(&'a str), |
100 | Universal, |
101 | } |
102 | |
103 | |
104 | #[derive (Clone, Copy, PartialEq, Debug)] |
105 | enum SubSelector<'a> { |
106 | Attribute(&'a str, AttributeOperator<'a>), |
107 | PseudoClass(PseudoClass<'a>), |
108 | } |
109 | |
110 | |
111 | #[derive (Clone, Debug)] |
112 | struct SimpleSelector<'a> { |
113 | kind: SimpleSelectorType<'a>, |
114 | subselectors: Vec<SubSelector<'a>>, |
115 | } |
116 | |
117 | |
118 | #[derive (Clone, Copy, PartialEq, Debug)] |
119 | enum Combinator { |
120 | None, |
121 | Descendant, |
122 | Child, |
123 | AdjacentSibling, |
124 | } |
125 | |
126 | |
127 | #[derive (Clone, Debug)] |
128 | struct Component<'a> { |
129 | /// A combinator that precede the selector. |
130 | combinator: Combinator, |
131 | selector: SimpleSelector<'a>, |
132 | } |
133 | |
134 | |
135 | /// A selector. |
136 | #[derive (Clone, Debug)] |
137 | pub struct Selector<'a> { |
138 | components: Vec<Component<'a>> |
139 | } |
140 | |
141 | impl<'a> Selector<'a> { |
142 | /// Parses a selector from a string. |
143 | /// |
144 | /// Will log any errors as a warnings. |
145 | /// |
146 | /// Parsing will be stopped at EOF, `,` or `{`. |
147 | pub fn parse(text: &'a str) -> Option<Self> { |
148 | parse(text).0 |
149 | } |
150 | |
151 | /// Compute the selector's specificity. |
152 | /// |
153 | /// Cf. https://www.w3.org/TR/selectors/#specificity. |
154 | pub fn specificity(&self) -> [u8; 3] { |
155 | let mut spec = [0u8; 3]; |
156 | |
157 | for selector in self.components.iter().map(|c| &c.selector) { |
158 | if matches!(selector.kind, SimpleSelectorType::Type(_)) { |
159 | spec[2] = spec[2].saturating_add(1); |
160 | } |
161 | |
162 | for sub in &selector.subselectors { |
163 | match sub { |
164 | SubSelector::Attribute("id" , _) => spec[0] = spec[0].saturating_add(1), |
165 | _ => spec[1] = spec[1].saturating_add(1), |
166 | } |
167 | } |
168 | } |
169 | |
170 | spec |
171 | } |
172 | |
173 | /// Checks that the provided element matches the current selector. |
174 | pub fn matches<E: Element>(&self, element: &E) -> bool { |
175 | assert!(!self.components.is_empty(), "selector must not be empty" ); |
176 | assert_eq!(self.components[0].combinator, Combinator::None, |
177 | "the first component must not have a combinator" ); |
178 | |
179 | self.matches_impl(self.components.len() - 1, element) |
180 | } |
181 | |
182 | fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool { |
183 | let ref component = self.components[idx]; |
184 | |
185 | if !match_selector(&component.selector, element) { |
186 | return false; |
187 | } |
188 | |
189 | match component.combinator { |
190 | Combinator::Descendant => { |
191 | let mut parent = element.parent_element(); |
192 | while let Some(e) = parent { |
193 | if self.matches_impl(idx - 1, &e) { |
194 | return true; |
195 | } |
196 | |
197 | parent = e.parent_element(); |
198 | } |
199 | |
200 | false |
201 | } |
202 | Combinator::Child => { |
203 | if let Some(parent) = element.parent_element() { |
204 | if self.matches_impl(idx - 1, &parent) { |
205 | return true; |
206 | } |
207 | } |
208 | |
209 | false |
210 | } |
211 | Combinator::AdjacentSibling => { |
212 | if let Some(prev) = element.prev_sibling_element() { |
213 | if self.matches_impl(idx - 1, &prev) { |
214 | return true; |
215 | } |
216 | } |
217 | |
218 | false |
219 | } |
220 | Combinator::None => { |
221 | true |
222 | } |
223 | } |
224 | } |
225 | } |
226 | |
227 | fn match_selector<E: Element>(selector: &SimpleSelector, element: &E) -> bool { |
228 | if let SimpleSelectorType::Type(ident: &str) = selector.kind { |
229 | if !element.has_local_name(ident) { |
230 | return false; |
231 | } |
232 | } |
233 | |
234 | for sub: &SubSelector<'_> in &selector.subselectors { |
235 | match sub { |
236 | SubSelector::Attribute(name: &&str, operator: &AttributeOperator<'_>) => { |
237 | if !element.attribute_matches(local_name:name, *operator) { |
238 | return false; |
239 | } |
240 | } |
241 | SubSelector::PseudoClass(class: &PseudoClass<'_>) => { |
242 | if !element.pseudo_class_matches(*class) { |
243 | return false; |
244 | } |
245 | } |
246 | } |
247 | } |
248 | |
249 | true |
250 | } |
251 | |
252 | pub(crate) fn parse(text: &str) -> (Option<Selector>, usize) { |
253 | let mut components: Vec<Component> = Vec::new(); |
254 | let mut combinator = Combinator::None; |
255 | |
256 | let mut tokenizer = SelectorTokenizer::from(text); |
257 | for token in &mut tokenizer { |
258 | let mut add_sub = |sub| { |
259 | if combinator == Combinator::None && !components.is_empty() { |
260 | if let Some(ref mut component) = components.last_mut() { |
261 | component.selector.subselectors.push(sub); |
262 | } |
263 | } else { |
264 | components.push(Component { |
265 | selector: SimpleSelector { |
266 | kind: SimpleSelectorType::Universal, |
267 | subselectors: vec![sub], |
268 | }, |
269 | combinator, |
270 | }); |
271 | |
272 | combinator = Combinator::None; |
273 | } |
274 | }; |
275 | |
276 | let token = match token { |
277 | Ok(t) => t, |
278 | Err(e) => { |
279 | warn!("Selector parsing failed cause {}." , e); |
280 | return (None, tokenizer.stream.pos()); |
281 | } |
282 | }; |
283 | |
284 | match token { |
285 | SelectorToken::UniversalSelector => { |
286 | components.push(Component { |
287 | selector: SimpleSelector { |
288 | kind: SimpleSelectorType::Universal, |
289 | subselectors: Vec::new(), |
290 | }, |
291 | combinator, |
292 | }); |
293 | |
294 | combinator = Combinator::None; |
295 | } |
296 | SelectorToken::TypeSelector(ident) => { |
297 | components.push(Component { |
298 | selector: SimpleSelector { |
299 | kind: SimpleSelectorType::Type(ident), |
300 | subselectors: Vec::new(), |
301 | }, |
302 | combinator, |
303 | }); |
304 | |
305 | combinator = Combinator::None; |
306 | } |
307 | SelectorToken::ClassSelector(ident) => { |
308 | add_sub(SubSelector::Attribute("class" , AttributeOperator::Contains(ident))); |
309 | } |
310 | SelectorToken::IdSelector(id) => { |
311 | add_sub(SubSelector::Attribute("id" , AttributeOperator::Matches(id))); |
312 | } |
313 | SelectorToken::AttributeSelector(name, op) => { |
314 | add_sub(SubSelector::Attribute(name, op)); |
315 | } |
316 | SelectorToken::PseudoClass(ident) => { |
317 | let class = match ident { |
318 | "first-child" => PseudoClass::FirstChild, |
319 | "link" => PseudoClass::Link, |
320 | "visited" => PseudoClass::Visited, |
321 | "hover" => PseudoClass::Hover, |
322 | "active" => PseudoClass::Active, |
323 | "focus" => PseudoClass::Focus, |
324 | _ => { |
325 | warn!("': {}' is not supported. Selector skipped." , ident); |
326 | return (None, tokenizer.stream.pos()); |
327 | } |
328 | }; |
329 | |
330 | // TODO: duplicates |
331 | // TODO: order |
332 | |
333 | add_sub(SubSelector::PseudoClass(class)); |
334 | } |
335 | SelectorToken::LangPseudoClass(lang) => { |
336 | add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang))); |
337 | } |
338 | SelectorToken::DescendantCombinator => { |
339 | combinator = Combinator::Descendant; |
340 | } |
341 | SelectorToken::ChildCombinator => { |
342 | combinator = Combinator::Child; |
343 | } |
344 | SelectorToken::AdjacentCombinator => { |
345 | combinator = Combinator::AdjacentSibling; |
346 | } |
347 | } |
348 | } |
349 | |
350 | if components.is_empty() { |
351 | (None, tokenizer.stream.pos()) |
352 | } else if components[0].combinator != Combinator::None { |
353 | debug_assert_eq!(components[0].combinator, Combinator::None, |
354 | "the first component must not have a combinator" ); |
355 | |
356 | (None, tokenizer.stream.pos()) |
357 | } else { |
358 | (Some(Selector { components }), tokenizer.stream.pos()) |
359 | } |
360 | } |
361 | |
362 | impl<'a> fmt::Display for Selector<'a> { |
363 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
364 | for component in &self.components { |
365 | match component.combinator { |
366 | Combinator::Descendant => write!(f, " " )?, |
367 | Combinator::Child => write!(f, " > " )?, |
368 | Combinator::AdjacentSibling => write!(f, " + " )?, |
369 | Combinator::None => {} |
370 | } |
371 | |
372 | match component.selector.kind { |
373 | SimpleSelectorType::Universal => write!(f, "*" )?, |
374 | SimpleSelectorType::Type(ident) => write!(f, " {}" , ident)?, |
375 | }; |
376 | |
377 | for sel in &component.selector.subselectors { |
378 | match sel { |
379 | SubSelector::Attribute(name, operator) => { |
380 | match operator { |
381 | AttributeOperator::Exists => { |
382 | write!(f, "[ {}]" , name)?; |
383 | } |
384 | AttributeOperator::Matches(value) => { |
385 | write!(f, "[ {}=' {}']" , name, value)?; |
386 | } |
387 | AttributeOperator::Contains(value) => { |
388 | write!(f, "[ {}~=' {}']" , name, value)?; |
389 | } |
390 | AttributeOperator::StartsWith(value) => { |
391 | write!(f, "[ {}|=' {}']" , name, value)?; |
392 | } |
393 | }; |
394 | } |
395 | SubSelector::PseudoClass(class) => write!(f, ": {}" , class)?, |
396 | } |
397 | } |
398 | } |
399 | |
400 | Ok(()) |
401 | } |
402 | } |
403 | |
404 | |
405 | /// A selector token. |
406 | #[derive (Clone, Copy, PartialEq, Debug)] |
407 | pub enum SelectorToken<'a> { |
408 | /// `*` |
409 | UniversalSelector, |
410 | |
411 | /// `div` |
412 | TypeSelector(&'a str), |
413 | |
414 | /// `.class` |
415 | ClassSelector(&'a str), |
416 | |
417 | /// `#id` |
418 | IdSelector(&'a str), |
419 | |
420 | /// `[color=red]` |
421 | AttributeSelector(&'a str, AttributeOperator<'a>), |
422 | |
423 | /// `:first-child` |
424 | PseudoClass(&'a str), |
425 | |
426 | /// `:lang(en)` |
427 | LangPseudoClass(&'a str), |
428 | |
429 | /// `a b` |
430 | DescendantCombinator, |
431 | |
432 | /// `a > b` |
433 | ChildCombinator, |
434 | |
435 | /// `a + b` |
436 | AdjacentCombinator, |
437 | } |
438 | |
439 | |
440 | /// A selector tokenizer. |
441 | /// |
442 | /// # Example |
443 | /// |
444 | /// ``` |
445 | /// use simplecss::{SelectorTokenizer, SelectorToken}; |
446 | /// |
447 | /// let mut t = SelectorTokenizer::from("div > p:first-child" ); |
448 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("div" )); |
449 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::ChildCombinator); |
450 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("p" )); |
451 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::PseudoClass("first-child" )); |
452 | /// assert!(t.next().is_none()); |
453 | /// ``` |
454 | pub struct SelectorTokenizer<'a> { |
455 | stream: Stream<'a>, |
456 | after_combinator: bool, |
457 | finished: bool, |
458 | } |
459 | |
460 | impl<'a> From<&'a str> for SelectorTokenizer<'a> { |
461 | fn from(text: &'a str) -> Self { |
462 | SelectorTokenizer { |
463 | stream: Stream::from(text), |
464 | after_combinator: true, |
465 | finished: false, |
466 | } |
467 | } |
468 | } |
469 | |
470 | impl<'a> Iterator for SelectorTokenizer<'a> { |
471 | type Item = Result<SelectorToken<'a>, Error>; |
472 | |
473 | fn next(&mut self) -> Option<Self::Item> { |
474 | if self.finished || self.stream.at_end() { |
475 | if self.after_combinator { |
476 | self.after_combinator = false; |
477 | return Some(Err(Error::SelectorMissing)); |
478 | } |
479 | |
480 | return None; |
481 | } |
482 | |
483 | macro_rules! try2 { |
484 | ($e:expr) => { |
485 | match $e { |
486 | Ok(v) => v, |
487 | Err(e) => { |
488 | self.finished = true; |
489 | return Some(Err(e)); |
490 | } |
491 | } |
492 | }; |
493 | } |
494 | |
495 | match self.stream.curr_byte_unchecked() { |
496 | b'*' => { |
497 | if !self.after_combinator { |
498 | self.finished = true; |
499 | return Some(Err(Error::UnexpectedSelector)); |
500 | } |
501 | |
502 | self.after_combinator = false; |
503 | self.stream.advance(1); |
504 | Some(Ok(SelectorToken::UniversalSelector)) |
505 | } |
506 | b'#' => { |
507 | self.after_combinator = false; |
508 | self.stream.advance(1); |
509 | let ident = try2!(self.stream.consume_ident()); |
510 | Some(Ok(SelectorToken::IdSelector(ident))) |
511 | } |
512 | b'.' => { |
513 | self.after_combinator = false; |
514 | self.stream.advance(1); |
515 | let ident = try2!(self.stream.consume_ident()); |
516 | Some(Ok(SelectorToken::ClassSelector(ident))) |
517 | } |
518 | b'[' => { |
519 | self.after_combinator = false; |
520 | self.stream.advance(1); |
521 | let ident = try2!(self.stream.consume_ident()); |
522 | |
523 | let op = match try2!(self.stream.curr_byte()) { |
524 | b']' => { |
525 | AttributeOperator::Exists |
526 | } |
527 | b'=' => { |
528 | self.stream.advance(1); |
529 | let value = try2!(self.stream.consume_string()); |
530 | AttributeOperator::Matches(value) |
531 | } |
532 | b'~' => { |
533 | self.stream.advance(1); |
534 | try2!(self.stream.consume_byte(b'=' )); |
535 | let value = try2!(self.stream.consume_string()); |
536 | AttributeOperator::Contains(value) |
537 | } |
538 | b'|' => { |
539 | self.stream.advance(1); |
540 | try2!(self.stream.consume_byte(b'=' )); |
541 | let value = try2!(self.stream.consume_string()); |
542 | AttributeOperator::StartsWith(value) |
543 | } |
544 | _ => { |
545 | self.finished = true; |
546 | return Some(Err(Error::InvalidAttributeSelector)); |
547 | } |
548 | }; |
549 | |
550 | try2!(self.stream.consume_byte(b']' )); |
551 | |
552 | Some(Ok(SelectorToken::AttributeSelector(ident, op))) |
553 | } |
554 | b':' => { |
555 | self.after_combinator = false; |
556 | self.stream.advance(1); |
557 | let ident = try2!(self.stream.consume_ident()); |
558 | |
559 | if ident == "lang" { |
560 | try2!(self.stream.consume_byte(b'(' )); |
561 | let lang = self.stream.consume_bytes(|c| c != b')' ).trim(); |
562 | try2!(self.stream.consume_byte(b')' )); |
563 | |
564 | if lang.is_empty() { |
565 | self.finished = true; |
566 | return Some(Err(Error::InvalidLanguagePseudoClass)); |
567 | } |
568 | |
569 | Some(Ok(SelectorToken::LangPseudoClass(lang))) |
570 | } else { |
571 | Some(Ok(SelectorToken::PseudoClass(ident))) |
572 | } |
573 | } |
574 | b'>' => { |
575 | if self.after_combinator { |
576 | self.after_combinator = false; |
577 | self.finished = true; |
578 | return Some(Err(Error::UnexpectedCombinator)); |
579 | } |
580 | |
581 | self.stream.advance(1); |
582 | self.after_combinator = true; |
583 | Some(Ok(SelectorToken::ChildCombinator)) |
584 | } |
585 | b'+' => { |
586 | if self.after_combinator { |
587 | self.after_combinator = false; |
588 | self.finished = true; |
589 | return Some(Err(Error::UnexpectedCombinator)); |
590 | } |
591 | |
592 | self.stream.advance(1); |
593 | self.after_combinator = true; |
594 | Some(Ok(SelectorToken::AdjacentCombinator)) |
595 | } |
596 | b' ' | b' \t' | b' \n' | b' \r' | b' \x0C' => { |
597 | self.stream.skip_spaces(); |
598 | |
599 | if self.after_combinator { |
600 | return self.next(); |
601 | } |
602 | |
603 | while self.stream.curr_byte() == Ok(b'/' ) { |
604 | try2!(self.stream.skip_comment()); |
605 | self.stream.skip_spaces(); |
606 | } |
607 | |
608 | match self.stream.curr_byte() { |
609 | Ok(b'>' ) | Ok(b'+' ) | Ok(b',' ) | Ok(b'{' ) | Err(_) => { |
610 | self.next() |
611 | } |
612 | _ => { |
613 | if self.after_combinator { |
614 | self.after_combinator = false; |
615 | self.finished = true; |
616 | return Some(Err(Error::UnexpectedSelector)); |
617 | } |
618 | |
619 | self.after_combinator = true; |
620 | Some(Ok(SelectorToken::DescendantCombinator)) |
621 | } |
622 | } |
623 | } |
624 | b'/' => { |
625 | if self.stream.next_byte() == Ok(b'*' ) { |
626 | try2!(self.stream.skip_comment()); |
627 | } else { |
628 | self.finished = true; |
629 | } |
630 | |
631 | self.next() |
632 | } |
633 | b',' | b'{' => { |
634 | self.finished = true; |
635 | self.next() |
636 | } |
637 | _ => { |
638 | let ident = try2!(self.stream.consume_ident()); |
639 | |
640 | if !self.after_combinator { |
641 | self.finished = true; |
642 | return Some(Err(Error::UnexpectedSelector)); |
643 | } |
644 | |
645 | self.after_combinator = false; |
646 | Some(Ok(SelectorToken::TypeSelector(ident))) |
647 | } |
648 | } |
649 | } |
650 | } |
651 | |