1 | // Copyright 2019 the SimpleCSS Authors |
2 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
3 | |
4 | use alloc::{vec, vec::Vec}; |
5 | use core::fmt; |
6 | |
7 | use log::warn; |
8 | |
9 | use crate::stream::Stream; |
10 | use crate::Error; |
11 | |
12 | /// An attribute selector operator. |
13 | #[derive (Clone, Copy, PartialEq, Debug)] |
14 | pub enum AttributeOperator<'a> { |
15 | /// `[attr]` |
16 | Exists, |
17 | /// `[attr=value]` |
18 | Matches(&'a str), |
19 | /// `[attr~=value]` |
20 | Contains(&'a str), |
21 | /// `[attr|=value]` |
22 | StartsWith(&'a str), |
23 | } |
24 | |
25 | impl AttributeOperator<'_> { |
26 | /// Checks that value is matching the operator. |
27 | pub fn matches(&self, value: &str) -> bool { |
28 | match *self { |
29 | AttributeOperator::Exists => true, |
30 | AttributeOperator::Matches(v: &str) => value == v, |
31 | AttributeOperator::Contains(v: &str) => value.split(' ' ).any(|s: &str| s == v), |
32 | AttributeOperator::StartsWith(v: &str) => { |
33 | // exactly `v` or beginning with `v` immediately followed by `-` |
34 | if value == v { |
35 | true |
36 | } else if value.starts_with(v) { |
37 | value.get(v.len()..v.len() + 1) == Some("-" ) |
38 | } else { |
39 | false |
40 | } |
41 | } |
42 | } |
43 | } |
44 | } |
45 | |
46 | /// A pseudo-class. |
47 | #[derive (Clone, Copy, PartialEq, Debug)] |
48 | #[allow (missing_docs)] |
49 | pub enum PseudoClass<'a> { |
50 | FirstChild, |
51 | Link, |
52 | Visited, |
53 | Hover, |
54 | Active, |
55 | Focus, |
56 | Lang(&'a str), |
57 | } |
58 | |
59 | impl fmt::Display for PseudoClass<'_> { |
60 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
61 | match self { |
62 | PseudoClass::FirstChild => write!(f, "first-child" ), |
63 | PseudoClass::Link => write!(f, "link" ), |
64 | PseudoClass::Visited => write!(f, "visited" ), |
65 | PseudoClass::Hover => write!(f, "hover" ), |
66 | PseudoClass::Active => write!(f, "active" ), |
67 | PseudoClass::Focus => write!(f, "focus" ), |
68 | PseudoClass::Lang(lang: &&str) => write!(f, "lang( {})" , lang), |
69 | } |
70 | } |
71 | } |
72 | |
73 | /// A trait to query an element node metadata. |
74 | pub trait Element: Sized { |
75 | /// Returns a parent element. |
76 | fn parent_element(&self) -> Option<Self>; |
77 | |
78 | /// Returns a previous sibling element. |
79 | fn prev_sibling_element(&self) -> Option<Self>; |
80 | |
81 | /// Checks that the element has a specified local name. |
82 | fn has_local_name(&self, name: &str) -> bool; |
83 | |
84 | /// Checks that the element has a specified attribute. |
85 | fn attribute_matches(&self, local_name: &str, operator: AttributeOperator<'_>) -> bool; |
86 | |
87 | /// Checks that the element matches a specified pseudo-class. |
88 | fn pseudo_class_matches(&self, class: PseudoClass<'_>) -> bool; |
89 | } |
90 | |
91 | #[derive (Clone, Copy, PartialEq, Debug)] |
92 | enum SimpleSelectorType<'a> { |
93 | Type(&'a str), |
94 | Universal, |
95 | } |
96 | |
97 | #[derive (Clone, Copy, PartialEq, Debug)] |
98 | enum SubSelector<'a> { |
99 | Attribute(&'a str, AttributeOperator<'a>), |
100 | PseudoClass(PseudoClass<'a>), |
101 | } |
102 | |
103 | #[derive (Clone, Debug)] |
104 | struct SimpleSelector<'a> { |
105 | kind: SimpleSelectorType<'a>, |
106 | subselectors: Vec<SubSelector<'a>>, |
107 | } |
108 | |
109 | #[derive (Clone, Copy, PartialEq, Debug)] |
110 | enum Combinator { |
111 | None, |
112 | Descendant, |
113 | Child, |
114 | AdjacentSibling, |
115 | } |
116 | |
117 | #[derive (Clone, Debug)] |
118 | struct Component<'a> { |
119 | /// A combinator that precede the selector. |
120 | combinator: Combinator, |
121 | selector: SimpleSelector<'a>, |
122 | } |
123 | |
124 | /// A selector. |
125 | #[derive (Clone, Debug)] |
126 | pub struct Selector<'a> { |
127 | components: Vec<Component<'a>>, |
128 | } |
129 | |
130 | impl<'a> Selector<'a> { |
131 | /// Parses a selector from a string. |
132 | /// |
133 | /// Will log any errors as a warnings. |
134 | /// |
135 | /// Parsing will be stopped at EOF, `,` or `{`. |
136 | pub fn parse(text: &'a str) -> Option<Self> { |
137 | parse(text).0 |
138 | } |
139 | |
140 | /// Compute the selector's specificity. |
141 | /// |
142 | /// Cf. <https://www.w3.org/TR/selectors/#specificity>. |
143 | pub fn specificity(&self) -> [u8; 3] { |
144 | let mut spec = [0u8; 3]; |
145 | |
146 | for selector in self.components.iter().map(|c| &c.selector) { |
147 | if matches!(selector.kind, SimpleSelectorType::Type(_)) { |
148 | spec[2] = spec[2].saturating_add(1); |
149 | } |
150 | |
151 | for sub in &selector.subselectors { |
152 | match sub { |
153 | SubSelector::Attribute("id" , _) => spec[0] = spec[0].saturating_add(1), |
154 | _ => spec[1] = spec[1].saturating_add(1), |
155 | } |
156 | } |
157 | } |
158 | |
159 | spec |
160 | } |
161 | |
162 | /// Checks that the provided element matches the current selector. |
163 | pub fn matches<E: Element>(&self, element: &E) -> bool { |
164 | assert!(!self.components.is_empty(), "selector must not be empty" ); |
165 | assert_eq!( |
166 | self.components[0].combinator, |
167 | Combinator::None, |
168 | "the first component must not have a combinator" |
169 | ); |
170 | |
171 | self.matches_impl(self.components.len() - 1, element) |
172 | } |
173 | |
174 | fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool { |
175 | let component = &self.components[idx]; |
176 | |
177 | if !match_selector(&component.selector, element) { |
178 | return false; |
179 | } |
180 | |
181 | match component.combinator { |
182 | Combinator::Descendant => { |
183 | let mut parent = element.parent_element(); |
184 | while let Some(e) = parent { |
185 | if self.matches_impl(idx - 1, &e) { |
186 | return true; |
187 | } |
188 | |
189 | parent = e.parent_element(); |
190 | } |
191 | |
192 | false |
193 | } |
194 | Combinator::Child => { |
195 | if let Some(parent) = element.parent_element() { |
196 | if self.matches_impl(idx - 1, &parent) { |
197 | return true; |
198 | } |
199 | } |
200 | |
201 | false |
202 | } |
203 | Combinator::AdjacentSibling => { |
204 | if let Some(prev) = element.prev_sibling_element() { |
205 | if self.matches_impl(idx - 1, &prev) { |
206 | return true; |
207 | } |
208 | } |
209 | |
210 | false |
211 | } |
212 | Combinator::None => true, |
213 | } |
214 | } |
215 | } |
216 | |
217 | fn match_selector<E: Element>(selector: &SimpleSelector<'_>, element: &E) -> bool { |
218 | if let SimpleSelectorType::Type(ident: &str) = selector.kind { |
219 | if !element.has_local_name(ident) { |
220 | return false; |
221 | } |
222 | } |
223 | |
224 | for sub: &SubSelector<'_> in &selector.subselectors { |
225 | match sub { |
226 | SubSelector::Attribute(name: &&str, operator: &AttributeOperator<'_>) => { |
227 | if !element.attribute_matches(name, *operator) { |
228 | return false; |
229 | } |
230 | } |
231 | SubSelector::PseudoClass(class: &PseudoClass<'_>) => { |
232 | if !element.pseudo_class_matches(*class) { |
233 | return false; |
234 | } |
235 | } |
236 | } |
237 | } |
238 | |
239 | true |
240 | } |
241 | |
242 | pub(crate) fn parse(text: &str) -> (Option<Selector<'_>>, usize) { |
243 | let mut components: Vec<Component<'_>> = Vec::new(); |
244 | let mut combinator = Combinator::None; |
245 | |
246 | let mut tokenizer = SelectorTokenizer::from(text); |
247 | for token in &mut tokenizer { |
248 | let mut add_sub = |sub| { |
249 | if combinator == Combinator::None && !components.is_empty() { |
250 | if let Some(ref mut component) = components.last_mut() { |
251 | component.selector.subselectors.push(sub); |
252 | } |
253 | } else { |
254 | components.push(Component { |
255 | selector: SimpleSelector { |
256 | kind: SimpleSelectorType::Universal, |
257 | subselectors: vec![sub], |
258 | }, |
259 | combinator, |
260 | }); |
261 | |
262 | combinator = Combinator::None; |
263 | } |
264 | }; |
265 | |
266 | let token = match token { |
267 | Ok(t) => t, |
268 | Err(e) => { |
269 | warn!("Selector parsing failed cause {}." , e); |
270 | return (None, tokenizer.stream.pos()); |
271 | } |
272 | }; |
273 | |
274 | match token { |
275 | SelectorToken::UniversalSelector => { |
276 | components.push(Component { |
277 | selector: SimpleSelector { |
278 | kind: SimpleSelectorType::Universal, |
279 | subselectors: Vec::new(), |
280 | }, |
281 | combinator, |
282 | }); |
283 | |
284 | combinator = Combinator::None; |
285 | } |
286 | SelectorToken::TypeSelector(ident) => { |
287 | components.push(Component { |
288 | selector: SimpleSelector { |
289 | kind: SimpleSelectorType::Type(ident), |
290 | subselectors: Vec::new(), |
291 | }, |
292 | combinator, |
293 | }); |
294 | |
295 | combinator = Combinator::None; |
296 | } |
297 | SelectorToken::ClassSelector(ident) => { |
298 | add_sub(SubSelector::Attribute( |
299 | "class" , |
300 | AttributeOperator::Contains(ident), |
301 | )); |
302 | } |
303 | SelectorToken::IdSelector(id) => { |
304 | add_sub(SubSelector::Attribute("id" , AttributeOperator::Matches(id))); |
305 | } |
306 | SelectorToken::AttributeSelector(name, op) => { |
307 | add_sub(SubSelector::Attribute(name, op)); |
308 | } |
309 | SelectorToken::PseudoClass(ident) => { |
310 | let class = match ident { |
311 | "first-child" => PseudoClass::FirstChild, |
312 | "link" => PseudoClass::Link, |
313 | "visited" => PseudoClass::Visited, |
314 | "hover" => PseudoClass::Hover, |
315 | "active" => PseudoClass::Active, |
316 | "focus" => PseudoClass::Focus, |
317 | _ => { |
318 | warn!("': {}' is not supported. Selector skipped." , ident); |
319 | return (None, tokenizer.stream.pos()); |
320 | } |
321 | }; |
322 | |
323 | // TODO: duplicates |
324 | // TODO: order |
325 | |
326 | add_sub(SubSelector::PseudoClass(class)); |
327 | } |
328 | SelectorToken::LangPseudoClass(lang) => { |
329 | add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang))); |
330 | } |
331 | SelectorToken::DescendantCombinator => { |
332 | combinator = Combinator::Descendant; |
333 | } |
334 | SelectorToken::ChildCombinator => { |
335 | combinator = Combinator::Child; |
336 | } |
337 | SelectorToken::AdjacentCombinator => { |
338 | combinator = Combinator::AdjacentSibling; |
339 | } |
340 | } |
341 | } |
342 | |
343 | if components.is_empty() { |
344 | (None, tokenizer.stream.pos()) |
345 | } else if components[0].combinator != Combinator::None { |
346 | debug_assert_eq!( |
347 | components[0].combinator, |
348 | Combinator::None, |
349 | "the first component must not have a combinator" |
350 | ); |
351 | |
352 | (None, tokenizer.stream.pos()) |
353 | } else { |
354 | (Some(Selector { components }), tokenizer.stream.pos()) |
355 | } |
356 | } |
357 | |
358 | impl fmt::Display for Selector<'_> { |
359 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
360 | for component in &self.components { |
361 | match component.combinator { |
362 | Combinator::Descendant => write!(f, " " )?, |
363 | Combinator::Child => write!(f, " > " )?, |
364 | Combinator::AdjacentSibling => write!(f, " + " )?, |
365 | Combinator::None => {} |
366 | } |
367 | |
368 | match component.selector.kind { |
369 | SimpleSelectorType::Universal => write!(f, "*" )?, |
370 | SimpleSelectorType::Type(ident) => write!(f, " {}" , ident)?, |
371 | }; |
372 | |
373 | for sel in &component.selector.subselectors { |
374 | match sel { |
375 | SubSelector::Attribute(name, operator) => { |
376 | match operator { |
377 | AttributeOperator::Exists => { |
378 | write!(f, "[ {}]" , name)?; |
379 | } |
380 | AttributeOperator::Matches(value) => { |
381 | write!(f, "[ {}=' {}']" , name, value)?; |
382 | } |
383 | AttributeOperator::Contains(value) => { |
384 | write!(f, "[ {}~=' {}']" , name, value)?; |
385 | } |
386 | AttributeOperator::StartsWith(value) => { |
387 | write!(f, "[ {}|=' {}']" , name, value)?; |
388 | } |
389 | }; |
390 | } |
391 | SubSelector::PseudoClass(class) => write!(f, ": {}" , class)?, |
392 | } |
393 | } |
394 | } |
395 | |
396 | Ok(()) |
397 | } |
398 | } |
399 | |
400 | /// A selector token. |
401 | #[derive (Clone, Copy, PartialEq, Debug)] |
402 | pub enum SelectorToken<'a> { |
403 | /// `*` |
404 | UniversalSelector, |
405 | |
406 | /// `div` |
407 | TypeSelector(&'a str), |
408 | |
409 | /// `.class` |
410 | ClassSelector(&'a str), |
411 | |
412 | /// `#id` |
413 | IdSelector(&'a str), |
414 | |
415 | /// `[color=red]` |
416 | AttributeSelector(&'a str, AttributeOperator<'a>), |
417 | |
418 | /// `:first-child` |
419 | PseudoClass(&'a str), |
420 | |
421 | /// `:lang(en)` |
422 | LangPseudoClass(&'a str), |
423 | |
424 | /// `a b` |
425 | DescendantCombinator, |
426 | |
427 | /// `a > b` |
428 | ChildCombinator, |
429 | |
430 | /// `a + b` |
431 | AdjacentCombinator, |
432 | } |
433 | |
434 | /// A selector tokenizer. |
435 | /// |
436 | /// # Example |
437 | /// |
438 | /// ``` |
439 | /// use simplecss::{SelectorTokenizer, SelectorToken}; |
440 | /// |
441 | /// let mut t = SelectorTokenizer::from("div > p:first-child" ); |
442 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("div" )); |
443 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::ChildCombinator); |
444 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("p" )); |
445 | /// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::PseudoClass("first-child" )); |
446 | /// assert!(t.next().is_none()); |
447 | /// ``` |
448 | pub struct SelectorTokenizer<'a> { |
449 | stream: Stream<'a>, |
450 | after_combinator: bool, |
451 | finished: bool, |
452 | } |
453 | |
454 | impl<'a> From<&'a str> for SelectorTokenizer<'a> { |
455 | fn from(text: &'a str) -> Self { |
456 | SelectorTokenizer { |
457 | stream: Stream::from(text), |
458 | after_combinator: true, |
459 | finished: false, |
460 | } |
461 | } |
462 | } |
463 | |
464 | impl<'a> Iterator for SelectorTokenizer<'a> { |
465 | type Item = Result<SelectorToken<'a>, Error>; |
466 | |
467 | fn next(&mut self) -> Option<Self::Item> { |
468 | if self.finished || self.stream.at_end() { |
469 | if self.after_combinator { |
470 | self.after_combinator = false; |
471 | return Some(Err(Error::SelectorMissing)); |
472 | } |
473 | |
474 | return None; |
475 | } |
476 | |
477 | macro_rules! try2 { |
478 | ($e:expr) => { |
479 | match $e { |
480 | Ok(v) => v, |
481 | Err(e) => { |
482 | self.finished = true; |
483 | return Some(Err(e)); |
484 | } |
485 | } |
486 | }; |
487 | } |
488 | |
489 | match self.stream.curr_byte_unchecked() { |
490 | b'*' => { |
491 | if !self.after_combinator { |
492 | self.finished = true; |
493 | return Some(Err(Error::UnexpectedSelector)); |
494 | } |
495 | |
496 | self.after_combinator = false; |
497 | self.stream.advance(1); |
498 | Some(Ok(SelectorToken::UniversalSelector)) |
499 | } |
500 | b'#' => { |
501 | self.after_combinator = false; |
502 | self.stream.advance(1); |
503 | let ident = try2!(self.stream.consume_ident()); |
504 | Some(Ok(SelectorToken::IdSelector(ident))) |
505 | } |
506 | b'.' => { |
507 | self.after_combinator = false; |
508 | self.stream.advance(1); |
509 | let ident = try2!(self.stream.consume_ident()); |
510 | Some(Ok(SelectorToken::ClassSelector(ident))) |
511 | } |
512 | b'[' => { |
513 | self.after_combinator = false; |
514 | self.stream.advance(1); |
515 | let ident = try2!(self.stream.consume_ident()); |
516 | |
517 | let op = match try2!(self.stream.curr_byte()) { |
518 | b']' => AttributeOperator::Exists, |
519 | b'=' => { |
520 | self.stream.advance(1); |
521 | let value = try2!(self.stream.consume_string()); |
522 | AttributeOperator::Matches(value) |
523 | } |
524 | b'~' => { |
525 | self.stream.advance(1); |
526 | try2!(self.stream.consume_byte(b'=' )); |
527 | let value = try2!(self.stream.consume_string()); |
528 | AttributeOperator::Contains(value) |
529 | } |
530 | b'|' => { |
531 | self.stream.advance(1); |
532 | try2!(self.stream.consume_byte(b'=' )); |
533 | let value = try2!(self.stream.consume_string()); |
534 | AttributeOperator::StartsWith(value) |
535 | } |
536 | _ => { |
537 | self.finished = true; |
538 | return Some(Err(Error::InvalidAttributeSelector)); |
539 | } |
540 | }; |
541 | |
542 | try2!(self.stream.consume_byte(b']' )); |
543 | |
544 | Some(Ok(SelectorToken::AttributeSelector(ident, op))) |
545 | } |
546 | b':' => { |
547 | self.after_combinator = false; |
548 | self.stream.advance(1); |
549 | let ident = try2!(self.stream.consume_ident()); |
550 | |
551 | if ident == "lang" { |
552 | try2!(self.stream.consume_byte(b'(' )); |
553 | let lang = self.stream.consume_bytes(|c| c != b')' ).trim(); |
554 | try2!(self.stream.consume_byte(b')' )); |
555 | |
556 | if lang.is_empty() { |
557 | self.finished = true; |
558 | return Some(Err(Error::InvalidLanguagePseudoClass)); |
559 | } |
560 | |
561 | Some(Ok(SelectorToken::LangPseudoClass(lang))) |
562 | } else { |
563 | Some(Ok(SelectorToken::PseudoClass(ident))) |
564 | } |
565 | } |
566 | b'>' => { |
567 | if self.after_combinator { |
568 | self.after_combinator = false; |
569 | self.finished = true; |
570 | return Some(Err(Error::UnexpectedCombinator)); |
571 | } |
572 | |
573 | self.stream.advance(1); |
574 | self.after_combinator = true; |
575 | Some(Ok(SelectorToken::ChildCombinator)) |
576 | } |
577 | b'+' => { |
578 | if self.after_combinator { |
579 | self.after_combinator = false; |
580 | self.finished = true; |
581 | return Some(Err(Error::UnexpectedCombinator)); |
582 | } |
583 | |
584 | self.stream.advance(1); |
585 | self.after_combinator = true; |
586 | Some(Ok(SelectorToken::AdjacentCombinator)) |
587 | } |
588 | b' ' | b' \t' | b' \n' | b' \r' | b' \x0C' => { |
589 | self.stream.skip_spaces(); |
590 | |
591 | if self.after_combinator { |
592 | return self.next(); |
593 | } |
594 | |
595 | while self.stream.curr_byte() == Ok(b'/' ) { |
596 | try2!(self.stream.skip_comment()); |
597 | self.stream.skip_spaces(); |
598 | } |
599 | |
600 | match self.stream.curr_byte() { |
601 | Ok(b'>' ) | Ok(b'+' ) | Ok(b',' ) | Ok(b'{' ) | Err(_) => self.next(), |
602 | _ => { |
603 | if self.after_combinator { |
604 | self.after_combinator = false; |
605 | self.finished = true; |
606 | return Some(Err(Error::UnexpectedSelector)); |
607 | } |
608 | |
609 | self.after_combinator = true; |
610 | Some(Ok(SelectorToken::DescendantCombinator)) |
611 | } |
612 | } |
613 | } |
614 | b'/' => { |
615 | if self.stream.next_byte() == Ok(b'*' ) { |
616 | try2!(self.stream.skip_comment()); |
617 | } else { |
618 | self.finished = true; |
619 | } |
620 | |
621 | self.next() |
622 | } |
623 | b',' | b'{' => { |
624 | self.finished = true; |
625 | self.next() |
626 | } |
627 | _ => { |
628 | let ident = try2!(self.stream.consume_ident()); |
629 | |
630 | if !self.after_combinator { |
631 | self.finished = true; |
632 | return Some(Err(Error::UnexpectedSelector)); |
633 | } |
634 | |
635 | self.after_combinator = false; |
636 | Some(Ok(SelectorToken::TypeSelector(ident))) |
637 | } |
638 | } |
639 | } |
640 | } |
641 | |