1 | // Copyright 2016 the SimpleCSS Authors |
2 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
3 | |
4 | /*! |
5 | A simple [CSS 2.1](https://www.w3.org/TR/CSS21/) parser and selector. |
6 | |
7 | This is not a browser-grade CSS parser. If you need one, |
8 | use [cssparser](https://crates.io/crates/cssparser) + |
9 | [selectors](https://crates.io/crates/selectors). |
10 | |
11 | Since it's very simple we will start with limitations: |
12 | |
13 | ## Limitations |
14 | |
15 | - [At-rules](https://www.w3.org/TR/CSS21/syndata.html#at-rules) are not supported. |
16 | They will be skipped during parsing. |
17 | - Property values are not parsed. |
18 | In CSS like `* { width: 5px }` you will get a `width` property with a `5px` value as a string. |
19 | - CDO/CDC comments are not supported. |
20 | - Parser is case sensitive. All keywords must be lowercase. |
21 | - Unicode escape, like `\26`, is not supported. |
22 | |
23 | ## Features |
24 | |
25 | - Selector matching support. |
26 | - The rules are sorted by specificity. |
27 | - `!important` parsing support. |
28 | - Has a high-level parsers and low-level, zero-allocation tokenizers. |
29 | - No unsafe. |
30 | */ |
31 | |
32 | // LINEBENDER LINT SET - lib.rs - v2 |
33 | // See https://linebender.org/wiki/canonical-lints/ |
34 | // These lints aren't included in Cargo.toml because they |
35 | // shouldn't apply to examples and tests |
36 | #![warn (unused_crate_dependencies)] |
37 | #![warn (clippy::print_stdout, clippy::print_stderr)] |
38 | // Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit. |
39 | #![cfg_attr (target_pointer_width = "64" , warn(clippy::trivially_copy_pass_by_ref))] |
40 | // END LINEBENDER LINT SET |
41 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
42 | #![cfg_attr (all(not(feature = "std" ), not(test)), no_std)] |
43 | // The following lints are part of the Linebender standard set, |
44 | // but resolving them has been deferred for now. |
45 | // Feel free to send a PR that solves one or more of these. |
46 | #![allow ( |
47 | missing_debug_implementations, |
48 | unreachable_pub, |
49 | clippy::use_self, |
50 | clippy::missing_assert_message, |
51 | clippy::missing_panics_doc, |
52 | clippy::exhaustive_enums, |
53 | clippy::unseparated_literal_suffix |
54 | )] |
55 | #![cfg_attr (test, allow(unused_crate_dependencies))] // Some dev dependencies are only used in tests |
56 | |
57 | extern crate alloc; |
58 | |
59 | use alloc::vec::Vec; |
60 | use core::fmt; |
61 | |
62 | use log::warn; |
63 | |
64 | mod selector; |
65 | mod stream; |
66 | |
67 | pub use selector::*; |
68 | use stream::Stream; |
69 | |
70 | /// A list of possible errors. |
71 | #[derive (Clone, Copy, PartialEq, Debug)] |
72 | pub enum Error { |
73 | /// The steam ended earlier than we expected. |
74 | /// |
75 | /// Should only appear on invalid input data. |
76 | UnexpectedEndOfStream, |
77 | |
78 | /// An invalid ident. |
79 | InvalidIdent(TextPos), |
80 | |
81 | /// An unclosed comment. |
82 | InvalidComment(TextPos), |
83 | |
84 | /// An invalid declaration value. |
85 | InvalidValue(TextPos), |
86 | |
87 | /// An invalid byte. |
88 | #[allow (missing_docs)] |
89 | InvalidByte { |
90 | expected: u8, |
91 | actual: u8, |
92 | pos: TextPos, |
93 | }, |
94 | |
95 | /// A missing selector. |
96 | SelectorMissing, |
97 | |
98 | /// An unexpected selector. |
99 | UnexpectedSelector, |
100 | |
101 | /// An unexpected combinator. |
102 | UnexpectedCombinator, |
103 | |
104 | /// An invalid or unsupported attribute selector. |
105 | InvalidAttributeSelector, |
106 | |
107 | /// An invalid language pseudo-class. |
108 | InvalidLanguagePseudoClass, |
109 | } |
110 | |
111 | impl fmt::Display for Error { |
112 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
113 | match *self { |
114 | Error::UnexpectedEndOfStream => { |
115 | write!(f, "unexpected end of stream" ) |
116 | } |
117 | Error::InvalidIdent(pos) => { |
118 | write!(f, "invalid ident at {}" , pos) |
119 | } |
120 | Error::InvalidComment(pos) => { |
121 | write!(f, "invalid comment at {}" , pos) |
122 | } |
123 | Error::InvalidValue(pos) => { |
124 | write!(f, "invalid value at {}" , pos) |
125 | } |
126 | Error::InvalidByte { |
127 | expected, |
128 | actual, |
129 | pos, |
130 | } => { |
131 | write!( |
132 | f, |
133 | "expected ' {}' not ' {}' at {}" , |
134 | expected as char, actual as char, pos |
135 | ) |
136 | } |
137 | Error::SelectorMissing => { |
138 | write!(f, "selector missing" ) |
139 | } |
140 | Error::UnexpectedSelector => { |
141 | write!(f, "unexpected selector" ) |
142 | } |
143 | Error::UnexpectedCombinator => { |
144 | write!(f, "unexpected combinator" ) |
145 | } |
146 | Error::InvalidAttributeSelector => { |
147 | write!(f, "invalid or unsupported attribute selector" ) |
148 | } |
149 | Error::InvalidLanguagePseudoClass => { |
150 | write!(f, "invalid language pseudo-class" ) |
151 | } |
152 | } |
153 | } |
154 | } |
155 | |
156 | #[cfg (feature = "std" )] |
157 | impl std::error::Error for Error {} |
158 | |
159 | /// A position in text. |
160 | /// |
161 | /// Position indicates a row/line and a column in the original text. Starting from 1:1. |
162 | #[derive (Clone, Copy, PartialEq, Debug)] |
163 | #[allow (missing_docs)] |
164 | pub struct TextPos { |
165 | pub row: u32, |
166 | pub col: u32, |
167 | } |
168 | |
169 | impl TextPos { |
170 | /// Constructs a new `TextPos`. |
171 | /// |
172 | /// Should not be invoked manually, but rather via `Stream::gen_text_pos`. |
173 | pub fn new(row: u32, col: u32) -> TextPos { |
174 | TextPos { row, col } |
175 | } |
176 | } |
177 | |
178 | impl fmt::Display for TextPos { |
179 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
180 | write!(f, " {}: {}" , self.row, self.col) |
181 | } |
182 | } |
183 | |
184 | /// A declaration. |
185 | #[derive (Clone, Copy, PartialEq, Debug)] |
186 | #[allow (missing_docs)] |
187 | pub struct Declaration<'a> { |
188 | pub name: &'a str, |
189 | pub value: &'a str, |
190 | pub important: bool, |
191 | } |
192 | |
193 | /// A rule. |
194 | #[derive (Clone, Debug)] |
195 | pub struct Rule<'a> { |
196 | /// A rule selector. |
197 | pub selector: Selector<'a>, |
198 | /// A rule declarations. |
199 | pub declarations: Vec<Declaration<'a>>, |
200 | } |
201 | |
202 | /// A style sheet. |
203 | #[derive (Clone, Debug)] |
204 | pub struct StyleSheet<'a> { |
205 | /// A list of rules. |
206 | pub rules: Vec<Rule<'a>>, |
207 | } |
208 | |
209 | impl<'a> StyleSheet<'a> { |
210 | /// Creates an empty style sheet. |
211 | pub fn new() -> Self { |
212 | StyleSheet { rules: Vec::new() } |
213 | } |
214 | |
215 | /// Parses a style sheet from text. |
216 | /// |
217 | /// At-rules are not supported and will be skipped. |
218 | /// |
219 | /// # Errors |
220 | /// |
221 | /// Doesn't produce any errors. In worst case scenario will return an empty stylesheet. |
222 | /// |
223 | /// All warnings will be logged. |
224 | pub fn parse(text: &'a str) -> Self { |
225 | let mut sheet = StyleSheet::new(); |
226 | sheet.parse_more(text); |
227 | sheet |
228 | } |
229 | |
230 | /// Parses a style sheet from a text to the current style sheet. |
231 | pub fn parse_more(&mut self, text: &'a str) { |
232 | let mut s = Stream::from(text); |
233 | |
234 | if s.skip_spaces_and_comments().is_err() { |
235 | return; |
236 | } |
237 | |
238 | while !s.at_end() { |
239 | if s.skip_spaces_and_comments().is_err() { |
240 | break; |
241 | } |
242 | |
243 | let _ = consume_statement(&mut s, &mut self.rules); |
244 | } |
245 | |
246 | if !s.at_end() { |
247 | warn!(" {} bytes were left." , s.slice_tail().len()); |
248 | } |
249 | |
250 | // Remove empty rules. |
251 | self.rules.retain(|rule| !rule.declarations.is_empty()); |
252 | |
253 | // Sort the rules by specificity. |
254 | self.rules |
255 | .sort_by_cached_key(|rule| rule.selector.specificity()); |
256 | } |
257 | } |
258 | |
259 | impl fmt::Display for StyleSheet<'_> { |
260 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
261 | for (i: usize, rule: &Rule<'_>) in self.rules.iter().enumerate() { |
262 | write!(f, " {} {{ " , rule.selector)?; |
263 | for dec: &Declaration<'_> in &rule.declarations { |
264 | write!(f, " {}: {}" , dec.name, dec.value)?; |
265 | if dec.important { |
266 | write!(f, " !important" )?; |
267 | } |
268 | write!(f, ";" )?; |
269 | } |
270 | write!(f, " }}" )?; |
271 | |
272 | if i != self.rules.len() - 1 { |
273 | writeln!(f)?; |
274 | } |
275 | } |
276 | |
277 | Ok(()) |
278 | } |
279 | } |
280 | |
281 | impl Default for StyleSheet<'_> { |
282 | fn default() -> Self { |
283 | Self::new() |
284 | } |
285 | } |
286 | |
287 | fn consume_statement<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
288 | if s.curr_byte() == Ok(b'@' ) { |
289 | s.advance(1); |
290 | consume_at_rule(s) |
291 | } else { |
292 | consume_rule_set(s, rules) |
293 | } |
294 | } |
295 | |
296 | fn consume_at_rule(s: &mut Stream<'_>) -> Result<(), Error> { |
297 | let ident: &str = s.consume_ident()?; |
298 | warn!("The @ {} rule is not supported. Skipped." , ident); |
299 | |
300 | s.skip_bytes(|c: u8| c != b';' && c != b'{' ); |
301 | |
302 | match s.curr_byte()? { |
303 | b';' => s.advance(1), |
304 | b'{' => consume_block(s), |
305 | _ => {} |
306 | } |
307 | |
308 | Ok(()) |
309 | } |
310 | |
311 | fn consume_rule_set<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
312 | let start_rule_idx = rules.len(); |
313 | |
314 | while s.curr_byte()? == b',' || start_rule_idx == rules.len() { |
315 | if s.curr_byte()? == b',' { |
316 | s.advance(1); |
317 | } |
318 | |
319 | let (selector, offset) = parse(s.slice_tail()); |
320 | s.advance(offset); |
321 | s.skip_spaces(); |
322 | |
323 | if let Some(selector) = selector { |
324 | rules.push(Rule { |
325 | selector, |
326 | declarations: Vec::new(), |
327 | }); |
328 | } |
329 | |
330 | match s.curr_byte()? { |
331 | b'{' => break, |
332 | b',' => {} |
333 | _ => { |
334 | s.skip_bytes(|c| c != b'{' ); |
335 | break; |
336 | } |
337 | } |
338 | } |
339 | |
340 | s.try_consume_byte(b'{' ); |
341 | |
342 | let declarations = consume_declarations(s)?; |
343 | for rule in rules.iter_mut().skip(start_rule_idx) { |
344 | rule.declarations = declarations.clone(); |
345 | } |
346 | |
347 | s.try_consume_byte(b'}' ); |
348 | |
349 | Ok(()) |
350 | } |
351 | |
352 | fn consume_block(s: &mut Stream<'_>) { |
353 | s.try_consume_byte(b'{' ); |
354 | consume_until_block_end(s); |
355 | } |
356 | |
357 | fn consume_until_block_end(s: &mut Stream<'_>) { |
358 | // Block can have nested blocks, so we have to check for matching braces. |
359 | // We simply counting the number of opening braces, which is incorrect, |
360 | // since `{` can be inside a string, but it's fine for majority of the cases. |
361 | |
362 | let mut braces = 0; |
363 | while !s.at_end() { |
364 | match s.curr_byte_unchecked() { |
365 | b'{' => { |
366 | braces += 1; |
367 | } |
368 | b'}' => { |
369 | if braces == 0 { |
370 | break; |
371 | } else { |
372 | braces -= 1; |
373 | } |
374 | } |
375 | _ => {} |
376 | } |
377 | |
378 | s.advance(1); |
379 | } |
380 | |
381 | s.try_consume_byte(b'}' ); |
382 | } |
383 | |
384 | fn consume_declarations<'a>(s: &mut Stream<'a>) -> Result<Vec<Declaration<'a>>, Error> { |
385 | let mut declarations: Vec> = Vec::new(); |
386 | |
387 | while !s.at_end() && s.curr_byte() != Ok(b'}' ) { |
388 | match consume_declaration(s) { |
389 | Ok(declaration: Declaration<'_>) => declarations.push(declaration), |
390 | Err(_) => { |
391 | consume_until_block_end(s); |
392 | break; |
393 | } |
394 | } |
395 | } |
396 | |
397 | Ok(declarations) |
398 | } |
399 | |
400 | /// A declaration tokenizer. |
401 | /// |
402 | /// Tokenizer will stop at the first invalid token. |
403 | /// |
404 | /// # Example |
405 | /// |
406 | /// ``` |
407 | /// use simplecss::{DeclarationTokenizer, Declaration}; |
408 | /// |
409 | /// let mut t = DeclarationTokenizer::from("background: url( \"img.png \"); color:red !important" ); |
410 | /// assert_eq!(t.next().unwrap(), Declaration { name: "background" , value: "url( \"img.png \")" , important: false }); |
411 | /// assert_eq!(t.next().unwrap(), Declaration { name: "color" , value: "red" , important: true }); |
412 | /// ``` |
413 | pub struct DeclarationTokenizer<'a> { |
414 | stream: Stream<'a>, |
415 | } |
416 | |
417 | impl<'a> From<&'a str> for DeclarationTokenizer<'a> { |
418 | fn from(text: &'a str) -> Self { |
419 | DeclarationTokenizer { |
420 | stream: Stream::from(text), |
421 | } |
422 | } |
423 | } |
424 | |
425 | impl<'a> Iterator for DeclarationTokenizer<'a> { |
426 | type Item = Declaration<'a>; |
427 | |
428 | fn next(&mut self) -> Option<Self::Item> { |
429 | let _ = self.stream.skip_spaces_and_comments(); |
430 | |
431 | if self.stream.at_end() { |
432 | return None; |
433 | } |
434 | |
435 | match consume_declaration(&mut self.stream) { |
436 | Ok(v: Declaration<'_>) => Some(v), |
437 | Err(_) => { |
438 | self.stream.jump_to_end(); |
439 | None |
440 | } |
441 | } |
442 | } |
443 | } |
444 | |
445 | fn consume_declaration<'a>(s: &mut Stream<'a>) -> Result<Declaration<'a>, Error> { |
446 | s.skip_spaces_and_comments()?; |
447 | |
448 | // Parse name. |
449 | |
450 | // https://snook.ca/archives/html_and_css/targetting_ie7 |
451 | if s.curr_byte() == Ok(b'*' ) { |
452 | s.advance(1); |
453 | } |
454 | |
455 | let name = s.consume_ident()?; |
456 | |
457 | s.skip_spaces_and_comments()?; |
458 | s.consume_byte(b':' )?; |
459 | s.skip_spaces_and_comments()?; |
460 | |
461 | // Parse value. |
462 | let start = s.pos(); |
463 | let mut end = s.pos(); |
464 | while consume_term(s).is_ok() { |
465 | end = s.pos(); |
466 | s.skip_spaces_and_comments()?; |
467 | } |
468 | let value = s.slice_range(start, end).trim(); |
469 | |
470 | s.skip_spaces_and_comments()?; |
471 | |
472 | // Check for `important`. |
473 | let mut important = false; |
474 | if s.curr_byte() == Ok(b'!' ) { |
475 | s.advance(1); |
476 | s.skip_spaces_and_comments()?; |
477 | if s.slice_tail().starts_with("important" ) { |
478 | s.advance(9); |
479 | important = true; |
480 | } |
481 | } |
482 | |
483 | s.skip_spaces_and_comments()?; |
484 | |
485 | while s.curr_byte() == Ok(b';' ) { |
486 | s.advance(1); |
487 | s.skip_spaces_and_comments()?; |
488 | } |
489 | |
490 | s.skip_spaces_and_comments()?; |
491 | |
492 | if value.is_empty() { |
493 | return Err(Error::InvalidValue(s.gen_text_pos_from(start))); |
494 | } |
495 | |
496 | Ok(Declaration { |
497 | name, |
498 | value, |
499 | important, |
500 | }) |
501 | } |
502 | |
503 | fn consume_term(s: &mut Stream<'_>) -> Result<(), Error> { |
504 | fn consume_digits(s: &mut Stream<'_>) { |
505 | while let Ok(b'0' ..=b'9' ) = s.curr_byte() { |
506 | s.advance(1); |
507 | } |
508 | } |
509 | |
510 | match s.curr_byte()? { |
511 | b'#' => { |
512 | s.advance(1); |
513 | match s.consume_ident() { |
514 | Ok(_) => {} |
515 | Err(_) => { |
516 | // Try consume as a hex color. |
517 | while let Ok(c) = s.curr_byte() { |
518 | match c { |
519 | b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' => s.advance(1), |
520 | _ => break, |
521 | } |
522 | } |
523 | } |
524 | } |
525 | } |
526 | b'+' | b'-' | b'0' ..=b'9' | b'.' => { |
527 | // Consume number. |
528 | |
529 | s.advance(1); |
530 | consume_digits(s); |
531 | if s.curr_byte() == Ok(b'.' ) { |
532 | s.advance(1); |
533 | consume_digits(s); |
534 | } |
535 | |
536 | if s.curr_byte() == Ok(b'%' ) { |
537 | s.advance(1); |
538 | } else { |
539 | // Consume suffix if any. |
540 | let _ = s.consume_ident(); |
541 | } |
542 | } |
543 | b' \'' | b'"' => { |
544 | s.consume_string()?; |
545 | } |
546 | b',' => { |
547 | s.advance(1); |
548 | } |
549 | _ => { |
550 | let _ = s.consume_ident()?; |
551 | |
552 | // Consume function. |
553 | if s.curr_byte() == Ok(b'(' ) { |
554 | s.skip_bytes(|c| c != b')' ); |
555 | s.consume_byte(b')' )?; |
556 | } |
557 | } |
558 | } |
559 | |
560 | Ok(()) |
561 | } |
562 | |