| 1 | // Copyright 2015 Google Inc. All rights reserved. |
| 2 | // |
| 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 | // of this software and associated documentation files (the "Software"), to deal |
| 5 | // in the Software without restriction, including without limitation the rights |
| 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 7 | // copies of the Software, and to permit persons to whom the Software is |
| 8 | // furnished to do so, subject to the following conditions: |
| 9 | // |
| 10 | // The above copyright notice and this permission notice shall be included in |
| 11 | // all copies or substantial portions of the Software. |
| 12 | // |
| 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 19 | // THE SOFTWARE. |
| 20 | |
| 21 | //! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct |
| 22 | //! which is an iterator over [Event](enum.Event.html)s. This iterator can be used |
| 23 | //! directly, or to output HTML using the [HTML module](html/index.html). |
| 24 | //! |
| 25 | //! By default, only CommonMark features are enabled. To use extensions like tables, |
| 26 | //! footnotes or task lists, enable them by setting the corresponding flags in the |
| 27 | //! [Options](struct.Options.html) struct. |
| 28 | //! |
| 29 | //! # Example |
| 30 | //! ```rust |
| 31 | //! use pulldown_cmark::{Parser, Options, html}; |
| 32 | //! |
| 33 | //! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example." ; |
| 34 | //! |
| 35 | //! // Set up options and parser. Strikethroughs are not part of the CommonMark standard |
| 36 | //! // and we therefore must enable it explicitly. |
| 37 | //! let mut options = Options::empty(); |
| 38 | //! options.insert(Options::ENABLE_STRIKETHROUGH); |
| 39 | //! let parser = Parser::new_ext(markdown_input, options); |
| 40 | //! |
| 41 | //! // Write to String buffer. |
| 42 | //! let mut html_output = String::new(); |
| 43 | //! html::push_html(&mut html_output, parser); |
| 44 | //! |
| 45 | //! // Check that the output is what we expected. |
| 46 | //! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p> \n" ; |
| 47 | //! assert_eq!(expected_html, &html_output); |
| 48 | //! ``` |
| 49 | |
| 50 | // When compiled for the rustc compiler itself we want to make sure that this is |
| 51 | // an unstable crate. |
| 52 | #![cfg_attr (rustbuild, feature(staged_api, rustc_private))] |
| 53 | #![cfg_attr (rustbuild, unstable(feature = "rustc_private" , issue = "27812" ))] |
| 54 | // Forbid unsafe code unless the SIMD feature is enabled. |
| 55 | #![cfg_attr (not(feature = "simd" ), forbid(unsafe_code))] |
| 56 | #![warn (missing_debug_implementations)] |
| 57 | |
| 58 | #[cfg (feature = "serde" )] |
| 59 | use serde::{Deserialize, Serialize}; |
| 60 | |
| 61 | pub mod html; |
| 62 | |
| 63 | mod entities; |
| 64 | pub mod escape; |
| 65 | mod firstpass; |
| 66 | mod linklabel; |
| 67 | mod parse; |
| 68 | mod puncttable; |
| 69 | mod scanners; |
| 70 | mod strings; |
| 71 | mod tree; |
| 72 | |
| 73 | use std::{convert::TryFrom, fmt::Display}; |
| 74 | |
| 75 | pub use crate::parse::{BrokenLink, BrokenLinkCallback, LinkDef, OffsetIter, Parser, RefDefs}; |
| 76 | pub use crate::strings::{CowStr, InlineStr}; |
| 77 | |
| 78 | /// Codeblock kind. |
| 79 | #[derive (Clone, Debug, PartialEq)] |
| 80 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 81 | pub enum CodeBlockKind<'a> { |
| 82 | Indented, |
| 83 | /// The value contained in the tag describes the language of the code, which may be empty. |
| 84 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 85 | Fenced(CowStr<'a>), |
| 86 | } |
| 87 | |
| 88 | impl<'a> CodeBlockKind<'a> { |
| 89 | pub fn is_indented(&self) -> bool { |
| 90 | matches!(*self, CodeBlockKind::Indented) |
| 91 | } |
| 92 | |
| 93 | pub fn is_fenced(&self) -> bool { |
| 94 | matches!(*self, CodeBlockKind::Fenced(_)) |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /// Tags for elements that can contain other elements. |
| 99 | #[derive (Clone, Debug, PartialEq)] |
| 100 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 101 | pub enum Tag<'a> { |
| 102 | /// A paragraph of text and other inline elements. |
| 103 | Paragraph, |
| 104 | |
| 105 | /// A heading. The first field indicates the level of the heading, |
| 106 | /// the second the fragment identifier, and the third the classes. |
| 107 | Heading(HeadingLevel, Option<&'a str>, Vec<&'a str>), |
| 108 | |
| 109 | BlockQuote, |
| 110 | /// A code block. |
| 111 | CodeBlock(CodeBlockKind<'a>), |
| 112 | |
| 113 | /// A list. If the list is ordered the field indicates the number of the first item. |
| 114 | /// Contains only list items. |
| 115 | List(Option<u64>), // TODO: add delim and tight for ast (not needed for html) |
| 116 | /// A list item. |
| 117 | Item, |
| 118 | /// A footnote definition. The value contained is the footnote's label by which it can |
| 119 | /// be referred to. |
| 120 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 121 | FootnoteDefinition(CowStr<'a>), |
| 122 | |
| 123 | /// A table. Contains a vector describing the text-alignment for each of its columns. |
| 124 | Table(Vec<Alignment>), |
| 125 | /// A table header. Contains only `TableCell`s. Note that the table body starts immediately |
| 126 | /// after the closure of the `TableHead` tag. There is no `TableBody` tag. |
| 127 | TableHead, |
| 128 | /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s. |
| 129 | TableRow, |
| 130 | TableCell, |
| 131 | |
| 132 | // span-level tags |
| 133 | Emphasis, |
| 134 | Strong, |
| 135 | Strikethrough, |
| 136 | |
| 137 | /// A link. The first field is the link type, the second the destination URL and the third is a title. |
| 138 | Link(LinkType, CowStr<'a>, CowStr<'a>), |
| 139 | |
| 140 | /// An image. The first field is the link type, the second the destination URL and the third is a title. |
| 141 | Image(LinkType, CowStr<'a>, CowStr<'a>), |
| 142 | } |
| 143 | |
| 144 | #[derive (Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] |
| 145 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 146 | pub enum HeadingLevel { |
| 147 | H1 = 1, |
| 148 | H2, |
| 149 | H3, |
| 150 | H4, |
| 151 | H5, |
| 152 | H6, |
| 153 | } |
| 154 | |
| 155 | impl Display for HeadingLevel { |
| 156 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 157 | match self { |
| 158 | Self::H1 => write!(f, "h1" ), |
| 159 | Self::H2 => write!(f, "h2" ), |
| 160 | Self::H3 => write!(f, "h3" ), |
| 161 | Self::H4 => write!(f, "h4" ), |
| 162 | Self::H5 => write!(f, "h5" ), |
| 163 | Self::H6 => write!(f, "h6" ), |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | /// Returned when trying to convert a `usize` into a `Heading` but it fails |
| 169 | /// because the usize isn't a valid heading level |
| 170 | #[derive (Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] |
| 171 | pub struct InvalidHeadingLevel(usize); |
| 172 | |
| 173 | impl TryFrom<usize> for HeadingLevel { |
| 174 | type Error = InvalidHeadingLevel; |
| 175 | |
| 176 | fn try_from(value: usize) -> Result<Self, Self::Error> { |
| 177 | match value { |
| 178 | 1 => Ok(Self::H1), |
| 179 | 2 => Ok(Self::H2), |
| 180 | 3 => Ok(Self::H3), |
| 181 | 4 => Ok(Self::H4), |
| 182 | 5 => Ok(Self::H5), |
| 183 | 6 => Ok(Self::H6), |
| 184 | _ => Err(InvalidHeadingLevel(value)), |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | /// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information. |
| 190 | #[derive (Clone, Debug, PartialEq, Copy)] |
| 191 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 192 | pub enum LinkType { |
| 193 | /// Inline link like `[foo](bar)` |
| 194 | Inline, |
| 195 | /// Reference link like `[foo][bar]` |
| 196 | Reference, |
| 197 | /// Reference without destination in the document, but resolved by the broken_link_callback |
| 198 | ReferenceUnknown, |
| 199 | /// Collapsed link like `[foo][]` |
| 200 | Collapsed, |
| 201 | /// Collapsed link without destination in the document, but resolved by the broken_link_callback |
| 202 | CollapsedUnknown, |
| 203 | /// Shortcut link like `[foo]` |
| 204 | Shortcut, |
| 205 | /// Shortcut without destination in the document, but resolved by the broken_link_callback |
| 206 | ShortcutUnknown, |
| 207 | /// Autolink like `<http://foo.bar/baz>` |
| 208 | Autolink, |
| 209 | /// Email address in autolink like `<john@example.org>` |
| 210 | Email, |
| 211 | } |
| 212 | |
| 213 | impl LinkType { |
| 214 | fn to_unknown(self) -> Self { |
| 215 | match self { |
| 216 | LinkType::Reference => LinkType::ReferenceUnknown, |
| 217 | LinkType::Collapsed => LinkType::CollapsedUnknown, |
| 218 | LinkType::Shortcut => LinkType::ShortcutUnknown, |
| 219 | _ => unreachable!(), |
| 220 | } |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | /// Markdown events that are generated in a preorder traversal of the document |
| 225 | /// tree, with additional `End` events whenever all of an inner node's children |
| 226 | /// have been visited. |
| 227 | #[derive (Clone, Debug, PartialEq)] |
| 228 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 229 | pub enum Event<'a> { |
| 230 | /// Start of a tagged element. Events that are yielded after this event |
| 231 | /// and before its corresponding `End` event are inside this element. |
| 232 | /// Start and end events are guaranteed to be balanced. |
| 233 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 234 | Start(Tag<'a>), |
| 235 | /// End of a tagged element. |
| 236 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 237 | End(Tag<'a>), |
| 238 | /// A text node. |
| 239 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 240 | Text(CowStr<'a>), |
| 241 | /// An inline code node. |
| 242 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 243 | Code(CowStr<'a>), |
| 244 | /// An HTML node. |
| 245 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 246 | Html(CowStr<'a>), |
| 247 | /// A reference to a footnote with given label, which may or may not be defined |
| 248 | /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may |
| 249 | /// occur in any order. |
| 250 | #[cfg_attr (feature = "serde" , serde(borrow))] |
| 251 | FootnoteReference(CowStr<'a>), |
| 252 | /// A soft line break. |
| 253 | SoftBreak, |
| 254 | /// A hard line break. |
| 255 | HardBreak, |
| 256 | /// A horizontal ruler. |
| 257 | Rule, |
| 258 | /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked. |
| 259 | TaskListMarker(bool), |
| 260 | } |
| 261 | |
| 262 | /// Table column text alignment. |
| 263 | #[derive (Copy, Clone, Debug, PartialEq)] |
| 264 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 265 | |
| 266 | pub enum Alignment { |
| 267 | /// Default text alignment. |
| 268 | None, |
| 269 | Left, |
| 270 | Center, |
| 271 | Right, |
| 272 | } |
| 273 | |
| 274 | bitflags::bitflags! { |
| 275 | /// Option struct containing flags for enabling extra features |
| 276 | /// that are not part of the CommonMark spec. |
| 277 | #[derive (Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
| 278 | pub struct Options: u32 { |
| 279 | const ENABLE_TABLES = 1 << 1; |
| 280 | const ENABLE_FOOTNOTES = 1 << 2; |
| 281 | const ENABLE_STRIKETHROUGH = 1 << 3; |
| 282 | const ENABLE_TASKLISTS = 1 << 4; |
| 283 | const ENABLE_SMART_PUNCTUATION = 1 << 5; |
| 284 | /// Extension to allow headings to have ID and classes. |
| 285 | /// |
| 286 | /// `# text { #id .class1 .class2 }` is interpreted as a level 1 heading |
| 287 | /// with the content `text`, ID `id`, and classes `class1` and `class2`. |
| 288 | /// Note that attributes (ID and classes) should be space-separated. |
| 289 | const ENABLE_HEADING_ATTRIBUTES = 1 << 6; |
| 290 | } |
| 291 | } |
| 292 | |