| 1 | // Copyright 2016 the SimpleCSS Authors |
| 2 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
| 3 | |
| 4 | /*! |
| 5 | A simple [CSS 2.1](https://www.w3.org/TR/CSS21/) parser and selector. |
| 6 | |
| 7 | This is not a browser-grade CSS parser. If you need one, |
| 8 | use [cssparser](https://crates.io/crates/cssparser) + |
| 9 | [selectors](https://crates.io/crates/selectors). |
| 10 | |
| 11 | Since it's very simple we will start with limitations: |
| 12 | |
| 13 | ## Limitations |
| 14 | |
| 15 | - [At-rules](https://www.w3.org/TR/CSS21/syndata.html#at-rules) are not supported. |
| 16 | They will be skipped during parsing. |
| 17 | - Property values are not parsed. |
| 18 | In CSS like `* { width: 5px }` you will get a `width` property with a `5px` value as a string. |
| 19 | - CDO/CDC comments are not supported. |
| 20 | - Parser is case sensitive. All keywords must be lowercase. |
| 21 | - Unicode escape, like `\26`, is not supported. |
| 22 | |
| 23 | ## Features |
| 24 | |
| 25 | - Selector matching support. |
| 26 | - The rules are sorted by specificity. |
| 27 | - `!important` parsing support. |
| 28 | - Has a high-level parsers and low-level, zero-allocation tokenizers. |
| 29 | - No unsafe. |
| 30 | */ |
| 31 | |
| 32 | // LINEBENDER LINT SET - lib.rs - v2 |
| 33 | // See https://linebender.org/wiki/canonical-lints/ |
| 34 | // These lints aren't included in Cargo.toml because they |
| 35 | // shouldn't apply to examples and tests |
| 36 | #![warn (unused_crate_dependencies)] |
| 37 | #![warn (clippy::print_stdout, clippy::print_stderr)] |
| 38 | // Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit. |
| 39 | #![cfg_attr (target_pointer_width = "64" , warn(clippy::trivially_copy_pass_by_ref))] |
| 40 | // END LINEBENDER LINT SET |
| 41 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
| 42 | #![cfg_attr (all(not(feature = "std" ), not(test)), no_std)] |
| 43 | // The following lints are part of the Linebender standard set, |
| 44 | // but resolving them has been deferred for now. |
| 45 | // Feel free to send a PR that solves one or more of these. |
| 46 | #![allow ( |
| 47 | missing_debug_implementations, |
| 48 | unreachable_pub, |
| 49 | clippy::use_self, |
| 50 | clippy::missing_assert_message, |
| 51 | clippy::missing_panics_doc, |
| 52 | clippy::exhaustive_enums, |
| 53 | clippy::unseparated_literal_suffix |
| 54 | )] |
| 55 | #![cfg_attr (test, allow(unused_crate_dependencies))] // Some dev dependencies are only used in tests |
| 56 | |
| 57 | extern crate alloc; |
| 58 | |
| 59 | use alloc::vec::Vec; |
| 60 | use core::fmt; |
| 61 | |
| 62 | use log::warn; |
| 63 | |
| 64 | mod selector; |
| 65 | mod stream; |
| 66 | |
| 67 | pub use selector::*; |
| 68 | use stream::Stream; |
| 69 | |
| 70 | /// A list of possible errors. |
| 71 | #[derive (Clone, Copy, PartialEq, Debug)] |
| 72 | pub enum Error { |
| 73 | /// The steam ended earlier than we expected. |
| 74 | /// |
| 75 | /// Should only appear on invalid input data. |
| 76 | UnexpectedEndOfStream, |
| 77 | |
| 78 | /// An invalid ident. |
| 79 | InvalidIdent(TextPos), |
| 80 | |
| 81 | /// An unclosed comment. |
| 82 | InvalidComment(TextPos), |
| 83 | |
| 84 | /// An invalid declaration value. |
| 85 | InvalidValue(TextPos), |
| 86 | |
| 87 | /// An invalid byte. |
| 88 | #[allow (missing_docs)] |
| 89 | InvalidByte { |
| 90 | expected: u8, |
| 91 | actual: u8, |
| 92 | pos: TextPos, |
| 93 | }, |
| 94 | |
| 95 | /// A missing selector. |
| 96 | SelectorMissing, |
| 97 | |
| 98 | /// An unexpected selector. |
| 99 | UnexpectedSelector, |
| 100 | |
| 101 | /// An unexpected combinator. |
| 102 | UnexpectedCombinator, |
| 103 | |
| 104 | /// An invalid or unsupported attribute selector. |
| 105 | InvalidAttributeSelector, |
| 106 | |
| 107 | /// An invalid language pseudo-class. |
| 108 | InvalidLanguagePseudoClass, |
| 109 | } |
| 110 | |
| 111 | impl fmt::Display for Error { |
| 112 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 113 | match *self { |
| 114 | Error::UnexpectedEndOfStream => { |
| 115 | write!(f, "unexpected end of stream" ) |
| 116 | } |
| 117 | Error::InvalidIdent(pos) => { |
| 118 | write!(f, "invalid ident at {}" , pos) |
| 119 | } |
| 120 | Error::InvalidComment(pos) => { |
| 121 | write!(f, "invalid comment at {}" , pos) |
| 122 | } |
| 123 | Error::InvalidValue(pos) => { |
| 124 | write!(f, "invalid value at {}" , pos) |
| 125 | } |
| 126 | Error::InvalidByte { |
| 127 | expected, |
| 128 | actual, |
| 129 | pos, |
| 130 | } => { |
| 131 | write!( |
| 132 | f, |
| 133 | "expected ' {}' not ' {}' at {}" , |
| 134 | expected as char, actual as char, pos |
| 135 | ) |
| 136 | } |
| 137 | Error::SelectorMissing => { |
| 138 | write!(f, "selector missing" ) |
| 139 | } |
| 140 | Error::UnexpectedSelector => { |
| 141 | write!(f, "unexpected selector" ) |
| 142 | } |
| 143 | Error::UnexpectedCombinator => { |
| 144 | write!(f, "unexpected combinator" ) |
| 145 | } |
| 146 | Error::InvalidAttributeSelector => { |
| 147 | write!(f, "invalid or unsupported attribute selector" ) |
| 148 | } |
| 149 | Error::InvalidLanguagePseudoClass => { |
| 150 | write!(f, "invalid language pseudo-class" ) |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | #[cfg (feature = "std" )] |
| 157 | impl std::error::Error for Error {} |
| 158 | |
| 159 | /// A position in text. |
| 160 | /// |
| 161 | /// Position indicates a row/line and a column in the original text. Starting from 1:1. |
| 162 | #[derive (Clone, Copy, PartialEq, Debug)] |
| 163 | #[allow (missing_docs)] |
| 164 | pub struct TextPos { |
| 165 | pub row: u32, |
| 166 | pub col: u32, |
| 167 | } |
| 168 | |
| 169 | impl TextPos { |
| 170 | /// Constructs a new `TextPos`. |
| 171 | /// |
| 172 | /// Should not be invoked manually, but rather via `Stream::gen_text_pos`. |
| 173 | pub fn new(row: u32, col: u32) -> TextPos { |
| 174 | TextPos { row, col } |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | impl fmt::Display for TextPos { |
| 179 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 180 | write!(f, " {}: {}" , self.row, self.col) |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | /// A declaration. |
| 185 | #[derive (Clone, Copy, PartialEq, Debug)] |
| 186 | #[allow (missing_docs)] |
| 187 | pub struct Declaration<'a> { |
| 188 | pub name: &'a str, |
| 189 | pub value: &'a str, |
| 190 | pub important: bool, |
| 191 | } |
| 192 | |
| 193 | /// A rule. |
| 194 | #[derive (Clone, Debug)] |
| 195 | pub struct Rule<'a> { |
| 196 | /// A rule selector. |
| 197 | pub selector: Selector<'a>, |
| 198 | /// A rule declarations. |
| 199 | pub declarations: Vec<Declaration<'a>>, |
| 200 | } |
| 201 | |
| 202 | /// A style sheet. |
| 203 | #[derive (Clone, Debug)] |
| 204 | pub struct StyleSheet<'a> { |
| 205 | /// A list of rules. |
| 206 | pub rules: Vec<Rule<'a>>, |
| 207 | } |
| 208 | |
| 209 | impl<'a> StyleSheet<'a> { |
| 210 | /// Creates an empty style sheet. |
| 211 | pub fn new() -> Self { |
| 212 | StyleSheet { rules: Vec::new() } |
| 213 | } |
| 214 | |
| 215 | /// Parses a style sheet from text. |
| 216 | /// |
| 217 | /// At-rules are not supported and will be skipped. |
| 218 | /// |
| 219 | /// # Errors |
| 220 | /// |
| 221 | /// Doesn't produce any errors. In worst case scenario will return an empty stylesheet. |
| 222 | /// |
| 223 | /// All warnings will be logged. |
| 224 | pub fn parse(text: &'a str) -> Self { |
| 225 | let mut sheet = StyleSheet::new(); |
| 226 | sheet.parse_more(text); |
| 227 | sheet |
| 228 | } |
| 229 | |
| 230 | /// Parses a style sheet from a text to the current style sheet. |
| 231 | pub fn parse_more(&mut self, text: &'a str) { |
| 232 | let mut s = Stream::from(text); |
| 233 | |
| 234 | if s.skip_spaces_and_comments().is_err() { |
| 235 | return; |
| 236 | } |
| 237 | |
| 238 | while !s.at_end() { |
| 239 | if s.skip_spaces_and_comments().is_err() { |
| 240 | break; |
| 241 | } |
| 242 | |
| 243 | let _ = consume_statement(&mut s, &mut self.rules); |
| 244 | } |
| 245 | |
| 246 | if !s.at_end() { |
| 247 | warn!(" {} bytes were left." , s.slice_tail().len()); |
| 248 | } |
| 249 | |
| 250 | // Remove empty rules. |
| 251 | self.rules.retain(|rule| !rule.declarations.is_empty()); |
| 252 | |
| 253 | // Sort the rules by specificity. |
| 254 | self.rules |
| 255 | .sort_by_cached_key(|rule| rule.selector.specificity()); |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | impl fmt::Display for StyleSheet<'_> { |
| 260 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 261 | for (i: usize, rule: &Rule<'_>) in self.rules.iter().enumerate() { |
| 262 | write!(f, " {} {{ " , rule.selector)?; |
| 263 | for dec: &Declaration<'_> in &rule.declarations { |
| 264 | write!(f, " {}: {}" , dec.name, dec.value)?; |
| 265 | if dec.important { |
| 266 | write!(f, " !important" )?; |
| 267 | } |
| 268 | write!(f, ";" )?; |
| 269 | } |
| 270 | write!(f, " }}" )?; |
| 271 | |
| 272 | if i != self.rules.len() - 1 { |
| 273 | writeln!(f)?; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | Ok(()) |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | impl Default for StyleSheet<'_> { |
| 282 | fn default() -> Self { |
| 283 | Self::new() |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | fn consume_statement<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
| 288 | if s.curr_byte() == Ok(b'@' ) { |
| 289 | s.advance(1); |
| 290 | consume_at_rule(s) |
| 291 | } else { |
| 292 | consume_rule_set(s, rules) |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | fn consume_at_rule(s: &mut Stream<'_>) -> Result<(), Error> { |
| 297 | let ident: &str = s.consume_ident()?; |
| 298 | warn!("The @ {} rule is not supported. Skipped." , ident); |
| 299 | |
| 300 | s.skip_bytes(|c: u8| c != b';' && c != b'{' ); |
| 301 | |
| 302 | match s.curr_byte()? { |
| 303 | b';' => s.advance(1), |
| 304 | b'{' => consume_block(s), |
| 305 | _ => {} |
| 306 | } |
| 307 | |
| 308 | Ok(()) |
| 309 | } |
| 310 | |
| 311 | fn consume_rule_set<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
| 312 | let start_rule_idx = rules.len(); |
| 313 | |
| 314 | while s.curr_byte()? == b',' || start_rule_idx == rules.len() { |
| 315 | if s.curr_byte()? == b',' { |
| 316 | s.advance(1); |
| 317 | } |
| 318 | |
| 319 | let (selector, offset) = parse(s.slice_tail()); |
| 320 | s.advance(offset); |
| 321 | s.skip_spaces(); |
| 322 | |
| 323 | if let Some(selector) = selector { |
| 324 | rules.push(Rule { |
| 325 | selector, |
| 326 | declarations: Vec::new(), |
| 327 | }); |
| 328 | } |
| 329 | |
| 330 | match s.curr_byte()? { |
| 331 | b'{' => break, |
| 332 | b',' => {} |
| 333 | _ => { |
| 334 | s.skip_bytes(|c| c != b'{' ); |
| 335 | break; |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | s.try_consume_byte(b'{' ); |
| 341 | |
| 342 | let declarations = consume_declarations(s)?; |
| 343 | for rule in rules.iter_mut().skip(start_rule_idx) { |
| 344 | rule.declarations = declarations.clone(); |
| 345 | } |
| 346 | |
| 347 | s.try_consume_byte(b'}' ); |
| 348 | |
| 349 | Ok(()) |
| 350 | } |
| 351 | |
| 352 | fn consume_block(s: &mut Stream<'_>) { |
| 353 | s.try_consume_byte(b'{' ); |
| 354 | consume_until_block_end(s); |
| 355 | } |
| 356 | |
| 357 | fn consume_until_block_end(s: &mut Stream<'_>) { |
| 358 | // Block can have nested blocks, so we have to check for matching braces. |
| 359 | // We simply counting the number of opening braces, which is incorrect, |
| 360 | // since `{` can be inside a string, but it's fine for majority of the cases. |
| 361 | |
| 362 | let mut braces = 0; |
| 363 | while !s.at_end() { |
| 364 | match s.curr_byte_unchecked() { |
| 365 | b'{' => { |
| 366 | braces += 1; |
| 367 | } |
| 368 | b'}' => { |
| 369 | if braces == 0 { |
| 370 | break; |
| 371 | } else { |
| 372 | braces -= 1; |
| 373 | } |
| 374 | } |
| 375 | _ => {} |
| 376 | } |
| 377 | |
| 378 | s.advance(1); |
| 379 | } |
| 380 | |
| 381 | s.try_consume_byte(b'}' ); |
| 382 | } |
| 383 | |
| 384 | fn consume_declarations<'a>(s: &mut Stream<'a>) -> Result<Vec<Declaration<'a>>, Error> { |
| 385 | let mut declarations: Vec> = Vec::new(); |
| 386 | |
| 387 | while !s.at_end() && s.curr_byte() != Ok(b'}' ) { |
| 388 | match consume_declaration(s) { |
| 389 | Ok(declaration: Declaration<'_>) => declarations.push(declaration), |
| 390 | Err(_) => { |
| 391 | consume_until_block_end(s); |
| 392 | break; |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | Ok(declarations) |
| 398 | } |
| 399 | |
| 400 | /// A declaration tokenizer. |
| 401 | /// |
| 402 | /// Tokenizer will stop at the first invalid token. |
| 403 | /// |
| 404 | /// # Example |
| 405 | /// |
| 406 | /// ``` |
| 407 | /// use simplecss::{DeclarationTokenizer, Declaration}; |
| 408 | /// |
| 409 | /// let mut t = DeclarationTokenizer::from("background: url( \"img.png \"); color:red !important" ); |
| 410 | /// assert_eq!(t.next().unwrap(), Declaration { name: "background" , value: "url( \"img.png \")" , important: false }); |
| 411 | /// assert_eq!(t.next().unwrap(), Declaration { name: "color" , value: "red" , important: true }); |
| 412 | /// ``` |
| 413 | pub struct DeclarationTokenizer<'a> { |
| 414 | stream: Stream<'a>, |
| 415 | } |
| 416 | |
| 417 | impl<'a> From<&'a str> for DeclarationTokenizer<'a> { |
| 418 | fn from(text: &'a str) -> Self { |
| 419 | DeclarationTokenizer { |
| 420 | stream: Stream::from(text), |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | impl<'a> Iterator for DeclarationTokenizer<'a> { |
| 426 | type Item = Declaration<'a>; |
| 427 | |
| 428 | fn next(&mut self) -> Option<Self::Item> { |
| 429 | let _ = self.stream.skip_spaces_and_comments(); |
| 430 | |
| 431 | if self.stream.at_end() { |
| 432 | return None; |
| 433 | } |
| 434 | |
| 435 | match consume_declaration(&mut self.stream) { |
| 436 | Ok(v: Declaration<'_>) => Some(v), |
| 437 | Err(_) => { |
| 438 | self.stream.jump_to_end(); |
| 439 | None |
| 440 | } |
| 441 | } |
| 442 | } |
| 443 | } |
| 444 | |
| 445 | fn consume_declaration<'a>(s: &mut Stream<'a>) -> Result<Declaration<'a>, Error> { |
| 446 | s.skip_spaces_and_comments()?; |
| 447 | |
| 448 | // Parse name. |
| 449 | |
| 450 | // https://snook.ca/archives/html_and_css/targetting_ie7 |
| 451 | if s.curr_byte() == Ok(b'*' ) { |
| 452 | s.advance(1); |
| 453 | } |
| 454 | |
| 455 | let name = s.consume_ident()?; |
| 456 | |
| 457 | s.skip_spaces_and_comments()?; |
| 458 | s.consume_byte(b':' )?; |
| 459 | s.skip_spaces_and_comments()?; |
| 460 | |
| 461 | // Parse value. |
| 462 | let start = s.pos(); |
| 463 | let mut end = s.pos(); |
| 464 | while consume_term(s).is_ok() { |
| 465 | end = s.pos(); |
| 466 | s.skip_spaces_and_comments()?; |
| 467 | } |
| 468 | let value = s.slice_range(start, end).trim(); |
| 469 | |
| 470 | s.skip_spaces_and_comments()?; |
| 471 | |
| 472 | // Check for `important`. |
| 473 | let mut important = false; |
| 474 | if s.curr_byte() == Ok(b'!' ) { |
| 475 | s.advance(1); |
| 476 | s.skip_spaces_and_comments()?; |
| 477 | if s.slice_tail().starts_with("important" ) { |
| 478 | s.advance(9); |
| 479 | important = true; |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | s.skip_spaces_and_comments()?; |
| 484 | |
| 485 | while s.curr_byte() == Ok(b';' ) { |
| 486 | s.advance(1); |
| 487 | s.skip_spaces_and_comments()?; |
| 488 | } |
| 489 | |
| 490 | s.skip_spaces_and_comments()?; |
| 491 | |
| 492 | if value.is_empty() { |
| 493 | return Err(Error::InvalidValue(s.gen_text_pos_from(start))); |
| 494 | } |
| 495 | |
| 496 | Ok(Declaration { |
| 497 | name, |
| 498 | value, |
| 499 | important, |
| 500 | }) |
| 501 | } |
| 502 | |
| 503 | fn consume_term(s: &mut Stream<'_>) -> Result<(), Error> { |
| 504 | fn consume_digits(s: &mut Stream<'_>) { |
| 505 | while let Ok(b'0' ..=b'9' ) = s.curr_byte() { |
| 506 | s.advance(1); |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | match s.curr_byte()? { |
| 511 | b'#' => { |
| 512 | s.advance(1); |
| 513 | match s.consume_ident() { |
| 514 | Ok(_) => {} |
| 515 | Err(_) => { |
| 516 | // Try consume as a hex color. |
| 517 | while let Ok(c) = s.curr_byte() { |
| 518 | match c { |
| 519 | b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' => s.advance(1), |
| 520 | _ => break, |
| 521 | } |
| 522 | } |
| 523 | } |
| 524 | } |
| 525 | } |
| 526 | b'+' | b'-' | b'0' ..=b'9' | b'.' => { |
| 527 | // Consume number. |
| 528 | |
| 529 | s.advance(1); |
| 530 | consume_digits(s); |
| 531 | if s.curr_byte() == Ok(b'.' ) { |
| 532 | s.advance(1); |
| 533 | consume_digits(s); |
| 534 | } |
| 535 | |
| 536 | if s.curr_byte() == Ok(b'%' ) { |
| 537 | s.advance(1); |
| 538 | } else { |
| 539 | // Consume suffix if any. |
| 540 | let _ = s.consume_ident(); |
| 541 | } |
| 542 | } |
| 543 | b' \'' | b'"' => { |
| 544 | s.consume_string()?; |
| 545 | } |
| 546 | b',' => { |
| 547 | s.advance(1); |
| 548 | } |
| 549 | _ => { |
| 550 | let _ = s.consume_ident()?; |
| 551 | |
| 552 | // Consume function. |
| 553 | if s.curr_byte() == Ok(b'(' ) { |
| 554 | s.skip_bytes(|c| c != b')' ); |
| 555 | s.consume_byte(b')' )?; |
| 556 | } |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | Ok(()) |
| 561 | } |
| 562 | |