1 | /*! |
2 | A simple [CSS 2.1](https://www.w3.org/TR/CSS21/) parser and selector. |
3 | |
4 | This is not a browser-grade CSS parser. If you need one, |
5 | use [cssparser](https://crates.io/crates/cssparser) + |
6 | [selectors](https://crates.io/crates/selectors). |
7 | |
8 | Since it's very simple we will start with limitations: |
9 | |
10 | ## Limitations |
11 | |
12 | - [At-rules](https://www.w3.org/TR/CSS21/syndata.html#at-rules) are not supported. |
13 | They will be skipped during parsing. |
14 | - Property values are not parsed. |
15 | In CSS like `* { width: 5px }` you will get a `width` property with a `5px` value as a string. |
16 | - CDO/CDC comments are not supported. |
17 | - Parser is case sensitive. All keywords must be lowercase. |
18 | - Unicode escape, like `\26`, is not supported. |
19 | |
20 | ## Features |
21 | |
22 | - Selector matching support. |
23 | - The rules are sorted by specificity. |
24 | - `!import` parsing support. |
25 | - Has a high-level parsers and low-level, zero-allocation tokenizers. |
26 | - No unsafe. |
27 | */ |
28 | |
29 | #![doc (html_root_url = "https://docs.rs/simplecss/0.2.1" )] |
30 | |
31 | #![forbid (unsafe_code)] |
32 | #![warn (missing_docs)] |
33 | |
34 | use std::fmt; |
35 | |
36 | use log::warn; |
37 | |
38 | mod selector; |
39 | mod stream; |
40 | |
41 | pub use selector::*; |
42 | use stream::Stream; |
43 | |
44 | |
45 | /// A list of possible errors. |
46 | #[derive (Clone, Copy, PartialEq, Debug)] |
47 | pub enum Error { |
48 | /// The steam ended earlier than we expected. |
49 | /// |
50 | /// Should only appear on invalid input data. |
51 | UnexpectedEndOfStream, |
52 | |
53 | /// An invalid ident. |
54 | InvalidIdent(TextPos), |
55 | |
56 | /// An unclosed comment. |
57 | InvalidComment(TextPos), |
58 | |
59 | /// An invalid declaration value. |
60 | InvalidValue(TextPos), |
61 | |
62 | /// An invalid byte. |
63 | #[allow (missing_docs)] |
64 | InvalidByte { expected: u8, actual: u8, pos: TextPos }, |
65 | |
66 | /// A missing selector. |
67 | SelectorMissing, |
68 | |
69 | /// An unexpected selector. |
70 | UnexpectedSelector, |
71 | |
72 | /// An unexpected combinator. |
73 | UnexpectedCombinator, |
74 | |
75 | /// An invalid or unsupported attribute selector. |
76 | InvalidAttributeSelector, |
77 | |
78 | /// An invalid language pseudo-class. |
79 | InvalidLanguagePseudoClass, |
80 | } |
81 | |
82 | impl fmt::Display for Error { |
83 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
84 | match *self { |
85 | Error::UnexpectedEndOfStream => { |
86 | write!(f, "unexpected end of stream" ) |
87 | } |
88 | Error::InvalidIdent(pos) => { |
89 | write!(f, "invalid ident at {}" , pos) |
90 | } |
91 | Error::InvalidComment(pos) => { |
92 | write!(f, "invalid comment at {}" , pos) |
93 | } |
94 | Error::InvalidValue(pos) => { |
95 | write!(f, "invalid value at {}" , pos) |
96 | } |
97 | Error::InvalidByte { expected, actual, pos } => { |
98 | write!(f, "expected ' {}' not ' {}' at {}" , |
99 | expected as char, actual as char, pos) |
100 | } |
101 | Error::SelectorMissing => { |
102 | write!(f, "selector missing" ) |
103 | } |
104 | Error::UnexpectedSelector => { |
105 | write!(f, "unexpected selector" ) |
106 | } |
107 | Error::UnexpectedCombinator => { |
108 | write!(f, "unexpected combinator" ) |
109 | } |
110 | Error::InvalidAttributeSelector => { |
111 | write!(f, "invalid or unsupported attribute selector" ) |
112 | } |
113 | Error::InvalidLanguagePseudoClass => { |
114 | write!(f, "invalid language pseudo-class" ) |
115 | } |
116 | } |
117 | } |
118 | } |
119 | |
120 | impl std::error::Error for Error {} |
121 | |
122 | |
123 | /// A position in text. |
124 | /// |
125 | /// Position indicates a row/line and a column in the original text. Starting from 1:1. |
126 | #[derive (Clone, Copy, PartialEq, Debug)] |
127 | #[allow (missing_docs)] |
128 | pub struct TextPos { |
129 | pub row: u32, |
130 | pub col: u32, |
131 | } |
132 | |
133 | impl TextPos { |
134 | /// Constructs a new `TextPos`. |
135 | /// |
136 | /// Should not be invoked manually, but rather via `Stream::gen_text_pos`. |
137 | pub fn new(row: u32, col: u32) -> TextPos { |
138 | TextPos { row, col } |
139 | } |
140 | } |
141 | |
142 | impl fmt::Display for TextPos { |
143 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
144 | write!(f, " {}: {}" , self.row, self.col) |
145 | } |
146 | } |
147 | |
148 | |
149 | /// A declaration. |
150 | #[derive (Clone, Copy, PartialEq, Debug)] |
151 | #[allow (missing_docs)] |
152 | pub struct Declaration<'a> { |
153 | pub name: &'a str, |
154 | pub value: &'a str, |
155 | pub important: bool, |
156 | } |
157 | |
158 | /// A rule. |
159 | #[derive (Clone, Debug)] |
160 | pub struct Rule<'a> { |
161 | /// A rule selector. |
162 | pub selector: Selector<'a>, |
163 | /// A rule declarations. |
164 | pub declarations: Vec<Declaration<'a>>, |
165 | } |
166 | |
167 | /// A style sheet. |
168 | #[derive (Clone, Debug)] |
169 | pub struct StyleSheet<'a> { |
170 | /// A list of rules. |
171 | pub rules: Vec<Rule<'a>>, |
172 | } |
173 | |
174 | impl<'a> StyleSheet<'a> { |
175 | /// Creates an empty style sheet. |
176 | pub fn new() -> Self { |
177 | StyleSheet { rules: Vec::new() } |
178 | } |
179 | |
180 | /// Parses a style sheet from text. |
181 | /// |
182 | /// At-rules are not supported and will be skipped. |
183 | /// |
184 | /// # Errors |
185 | /// |
186 | /// Doesn't produce any errors. In worst case scenario will return an empty stylesheet. |
187 | /// |
188 | /// All warnings will be logged. |
189 | pub fn parse(text: &'a str) -> Self { |
190 | let mut sheet = StyleSheet::new(); |
191 | sheet.parse_more(text); |
192 | sheet |
193 | } |
194 | |
195 | /// Parses a style sheet from a text to the current style sheet. |
196 | pub fn parse_more(&mut self, text: &'a str) { |
197 | let mut s = Stream::from(text); |
198 | |
199 | if s.skip_spaces_and_comments().is_err() { |
200 | return; |
201 | } |
202 | |
203 | while !s.at_end() { |
204 | if s.skip_spaces_and_comments().is_err() { |
205 | break; |
206 | } |
207 | |
208 | let _ = consume_statement(&mut s, &mut self.rules); |
209 | } |
210 | |
211 | if !s.at_end() { |
212 | warn!(" {} bytes were left." , s.slice_tail().len()); |
213 | } |
214 | |
215 | // Remove empty rules. |
216 | self.rules.retain(|rule| !rule.declarations.is_empty()); |
217 | |
218 | // Sort the rules by specificity. |
219 | self.rules.sort_by_cached_key(|rule| rule.selector.specificity()); |
220 | } |
221 | } |
222 | |
223 | impl fmt::Display for StyleSheet<'_> { |
224 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
225 | for (i: usize, rule: &Rule<'_>) in self.rules.iter().enumerate() { |
226 | write!(f, " {} {{ " , rule.selector)?; |
227 | for dec: &Declaration<'_> in &rule.declarations { |
228 | write!(f, " {}: {}" , dec.name, dec.value)?; |
229 | if dec.important { |
230 | write!(f, " !important" )?; |
231 | } |
232 | write!(f, ";" )?; |
233 | } |
234 | write!(f, " }}" )?; |
235 | |
236 | if i != self.rules.len() - 1 { |
237 | writeln!(f)?; |
238 | } |
239 | } |
240 | |
241 | Ok(()) |
242 | } |
243 | } |
244 | |
245 | fn consume_statement<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
246 | if s.curr_byte() == Ok(b'@' ) { |
247 | s.advance(1); |
248 | consume_at_rule(s) |
249 | } else { |
250 | consume_rule_set(s, rules) |
251 | } |
252 | } |
253 | |
254 | fn consume_at_rule(s: &mut Stream) -> Result<(), Error> { |
255 | let ident: &str = s.consume_ident()?; |
256 | warn!("The @ {} rule is not supported. Skipped." , ident); |
257 | |
258 | s.skip_bytes(|c: u8| c != b';' && c != b'{' ); |
259 | |
260 | match s.curr_byte()? { |
261 | b';' => s.advance(1), |
262 | b'{' => consume_block(s), |
263 | _ => {} |
264 | } |
265 | |
266 | Ok(()) |
267 | } |
268 | |
269 | fn consume_rule_set<'a>(s: &mut Stream<'a>, rules: &mut Vec<Rule<'a>>) -> Result<(), Error> { |
270 | let start_rule_idx = rules.len(); |
271 | |
272 | while s.curr_byte()? == b',' || start_rule_idx == rules.len() { |
273 | if s.curr_byte()? == b',' { |
274 | s.advance(1); |
275 | } |
276 | |
277 | let (selector, offset) = crate::selector::parse(s.slice_tail()); |
278 | s.advance(offset); |
279 | s.skip_spaces(); |
280 | |
281 | if let Some(selector) = selector { |
282 | rules.push(Rule { selector, declarations: Vec::new() }); |
283 | } |
284 | |
285 | match s.curr_byte()? { |
286 | b'{' => break, |
287 | b',' => {} |
288 | _ => { |
289 | s.skip_bytes(|c| c != b'{' ); |
290 | break; |
291 | } |
292 | } |
293 | } |
294 | |
295 | s.try_consume_byte(b'{' ); |
296 | |
297 | let declarations = consume_declarations(s)?; |
298 | for i in start_rule_idx..rules.len() { |
299 | rules[i].declarations = declarations.clone(); |
300 | } |
301 | |
302 | s.try_consume_byte(b'}' ); |
303 | |
304 | Ok(()) |
305 | } |
306 | |
307 | fn consume_block(s: &mut Stream) { |
308 | s.try_consume_byte(b'{' ); |
309 | consume_until_block_end(s); |
310 | } |
311 | |
312 | fn consume_until_block_end(s: &mut Stream) { |
313 | // Block can have nested blocks, so we have to check for matching braces. |
314 | // We simply counting the number of opening braces, which is incorrect, |
315 | // since `{` can be inside a string, but it's fine for majority of the cases. |
316 | |
317 | let mut braces = 0; |
318 | while !s.at_end() { |
319 | match s.curr_byte_unchecked() { |
320 | b'{' => { |
321 | braces += 1; |
322 | } |
323 | b'}' => { |
324 | if braces == 0 { |
325 | break; |
326 | } else { |
327 | braces -= 1; |
328 | } |
329 | } |
330 | _ => {} |
331 | } |
332 | |
333 | s.advance(1); |
334 | } |
335 | |
336 | s.try_consume_byte(b'}' ); |
337 | } |
338 | |
339 | fn consume_declarations<'a>(s: &mut Stream<'a>) -> Result<Vec<Declaration<'a>>, Error> { |
340 | let mut declarations: Vec> = Vec::new(); |
341 | |
342 | while !s.at_end() && s.curr_byte() != Ok(b'}' ) { |
343 | match consume_declaration(s) { |
344 | Ok(declaration: Declaration<'_>) => declarations.push(declaration), |
345 | Err(_) => { |
346 | consume_until_block_end(s); |
347 | break; |
348 | } |
349 | } |
350 | } |
351 | |
352 | Ok(declarations) |
353 | } |
354 | |
355 | |
356 | /// A declaration tokenizer. |
357 | /// |
358 | /// Tokenizer will stop at the first invalid token. |
359 | /// |
360 | /// # Example |
361 | /// |
362 | /// ``` |
363 | /// use simplecss::{DeclarationTokenizer, Declaration}; |
364 | /// |
365 | /// let mut t = DeclarationTokenizer::from("background: url( \"img.png \"); color:red !important" ); |
366 | /// assert_eq!(t.next().unwrap(), Declaration { name: "background" , value: "url( \"img.png \")" , important: false }); |
367 | /// assert_eq!(t.next().unwrap(), Declaration { name: "color" , value: "red" , important: true }); |
368 | /// ``` |
369 | pub struct DeclarationTokenizer<'a> { |
370 | stream: Stream<'a>, |
371 | } |
372 | |
373 | impl<'a> From<&'a str> for DeclarationTokenizer<'a> { |
374 | fn from(text: &'a str) -> Self { |
375 | DeclarationTokenizer { |
376 | stream: Stream::from(text), |
377 | } |
378 | } |
379 | } |
380 | |
381 | impl<'a> Iterator for DeclarationTokenizer<'a> { |
382 | type Item = Declaration<'a>; |
383 | |
384 | fn next(&mut self) -> Option<Self::Item> { |
385 | let _ = self.stream.skip_spaces_and_comments(); |
386 | |
387 | if self.stream.at_end() { |
388 | return None; |
389 | } |
390 | |
391 | match consume_declaration(&mut self.stream) { |
392 | Ok(v: Declaration<'_>) => Some(v), |
393 | Err(_) => { |
394 | self.stream.jump_to_end(); |
395 | None |
396 | } |
397 | } |
398 | } |
399 | } |
400 | |
401 | fn consume_declaration<'a>(s: &mut Stream<'a>) -> Result<Declaration<'a>, Error> { |
402 | s.skip_spaces_and_comments()?; |
403 | |
404 | // Parse name. |
405 | |
406 | // https://snook.ca/archives/html_and_css/targetting_ie7 |
407 | if s.curr_byte() == Ok(b'*' ) { |
408 | s.advance(1); |
409 | } |
410 | |
411 | let name = s.consume_ident()?; |
412 | |
413 | s.skip_spaces_and_comments()?; |
414 | s.consume_byte(b':' )?; |
415 | s.skip_spaces_and_comments()?; |
416 | |
417 | // Parse value. |
418 | let start = s.pos(); |
419 | let mut end = s.pos(); |
420 | while consume_term(s).is_ok() { |
421 | end = s.pos(); |
422 | s.skip_spaces_and_comments()?; |
423 | } |
424 | let value = s.slice_range(start, end).trim(); |
425 | |
426 | s.skip_spaces_and_comments()?; |
427 | |
428 | // Check for `important`. |
429 | let mut important = false; |
430 | if s.curr_byte() == Ok(b'!' ) { |
431 | s.advance(1); |
432 | s.skip_spaces_and_comments()?; |
433 | if s.slice_tail().starts_with("important" ) { |
434 | s.advance(9); |
435 | important = true; |
436 | } |
437 | } |
438 | |
439 | s.skip_spaces_and_comments()?; |
440 | |
441 | while s.curr_byte() == Ok(b';' ) { |
442 | s.advance(1); |
443 | s.skip_spaces_and_comments()?; |
444 | } |
445 | |
446 | s.skip_spaces_and_comments()?; |
447 | |
448 | if value.is_empty() { |
449 | return Err(Error::InvalidValue(s.gen_text_pos_from(start))); |
450 | } |
451 | |
452 | Ok(Declaration { name, value, important }) |
453 | } |
454 | |
455 | fn consume_term(s: &mut Stream) -> Result<(), Error> { |
456 | fn consume_digits(s: &mut Stream) { |
457 | while let Ok(c) = s.curr_byte() { |
458 | match c { |
459 | b'0' ..=b'9' => s.advance(1), |
460 | _ => break, |
461 | } |
462 | } |
463 | } |
464 | |
465 | match s.curr_byte()? { |
466 | b'#' => { |
467 | s.advance(1); |
468 | match s.consume_ident() { |
469 | Ok(_) => {} |
470 | Err(_) => { |
471 | // Try consume as a hex color. |
472 | while let Ok(c) = s.curr_byte() { |
473 | match c { |
474 | b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' => s.advance(1), |
475 | _ => break, |
476 | } |
477 | } |
478 | } |
479 | } |
480 | } |
481 | b'+' | b'-' | b'0' ..=b'9' | b'.' => { |
482 | // Consume number. |
483 | |
484 | s.advance(1); |
485 | consume_digits(s); |
486 | if s.curr_byte() == Ok(b'.' ) { |
487 | s.advance(1); |
488 | consume_digits(s); |
489 | } |
490 | |
491 | if s.curr_byte() == Ok(b'%' ) { |
492 | s.advance(1); |
493 | } else { |
494 | // Consume suffix if any. |
495 | let _ = s.consume_ident(); |
496 | } |
497 | } |
498 | b' \'' | b'"' => { |
499 | s.consume_string()?; |
500 | } |
501 | b',' => { |
502 | s.advance(1); |
503 | } |
504 | _ => { |
505 | let _ = s.consume_ident()?; |
506 | |
507 | // Consume function. |
508 | if s.curr_byte() == Ok(b'(' ) { |
509 | s.skip_bytes(|c| c != b')' ); |
510 | s.consume_byte(b')' )?; |
511 | } |
512 | } |
513 | } |
514 | |
515 | Ok(()) |
516 | } |
517 | |