1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | // of this software and associated documentation files (the "Software"), to deal |
5 | // in the Software without restriction, including without limitation the rights |
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | // copies of the Software, and to permit persons to whom the Software is |
8 | // furnished to do so, subject to the following conditions: |
9 | // |
10 | // The above copyright notice and this permission notice shall be included in |
11 | // all copies or substantial portions of the Software. |
12 | // |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | // THE SOFTWARE. |
20 | |
21 | //! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct |
22 | //! which is an iterator over [Event](enum.Event.html)s. This iterator can be used |
23 | //! directly, or to output HTML using the [HTML module](html/index.html). |
24 | //! |
25 | //! By default, only CommonMark features are enabled. To use extensions like tables, |
26 | //! footnotes or task lists, enable them by setting the corresponding flags in the |
27 | //! [Options](struct.Options.html) struct. |
28 | //! |
29 | //! # Example |
30 | //! ```rust |
31 | //! use pulldown_cmark::{Parser, Options, html}; |
32 | //! |
33 | //! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example." ; |
34 | //! |
35 | //! // Set up options and parser. Strikethroughs are not part of the CommonMark standard |
36 | //! // and we therefore must enable it explicitly. |
37 | //! let mut options = Options::empty(); |
38 | //! options.insert(Options::ENABLE_STRIKETHROUGH); |
39 | //! let parser = Parser::new_ext(markdown_input, options); |
40 | //! |
41 | //! // Write to String buffer. |
42 | //! let mut html_output = String::new(); |
43 | //! html::push_html(&mut html_output, parser); |
44 | //! |
45 | //! // Check that the output is what we expected. |
46 | //! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p> \n" ; |
47 | //! assert_eq!(expected_html, &html_output); |
48 | //! ``` |
49 | |
50 | // When compiled for the rustc compiler itself we want to make sure that this is |
51 | // an unstable crate. |
52 | #![cfg_attr (rustbuild, feature(staged_api, rustc_private))] |
53 | #![cfg_attr (rustbuild, unstable(feature = "rustc_private" , issue = "27812" ))] |
54 | // Forbid unsafe code unless the SIMD feature is enabled. |
55 | #![cfg_attr (not(feature = "simd" ), forbid(unsafe_code))] |
56 | #![warn (missing_debug_implementations)] |
57 | |
58 | #[cfg (feature = "serde" )] |
59 | use serde::{Deserialize, Serialize}; |
60 | |
61 | pub mod html; |
62 | |
63 | mod entities; |
64 | pub mod escape; |
65 | mod firstpass; |
66 | mod linklabel; |
67 | mod parse; |
68 | mod puncttable; |
69 | mod scanners; |
70 | mod strings; |
71 | mod tree; |
72 | |
73 | use std::{convert::TryFrom, fmt::Display}; |
74 | |
75 | pub use crate::parse::{BrokenLink, BrokenLinkCallback, LinkDef, OffsetIter, Parser, RefDefs}; |
76 | pub use crate::strings::{CowStr, InlineStr}; |
77 | |
78 | /// Codeblock kind. |
79 | #[derive (Clone, Debug, PartialEq)] |
80 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
81 | pub enum CodeBlockKind<'a> { |
82 | Indented, |
83 | /// The value contained in the tag describes the language of the code, which may be empty. |
84 | #[cfg_attr (feature = "serde" , serde(borrow))] |
85 | Fenced(CowStr<'a>), |
86 | } |
87 | |
88 | impl<'a> CodeBlockKind<'a> { |
89 | pub fn is_indented(&self) -> bool { |
90 | matches!(*self, CodeBlockKind::Indented) |
91 | } |
92 | |
93 | pub fn is_fenced(&self) -> bool { |
94 | matches!(*self, CodeBlockKind::Fenced(_)) |
95 | } |
96 | } |
97 | |
98 | /// Tags for elements that can contain other elements. |
99 | #[derive (Clone, Debug, PartialEq)] |
100 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
101 | pub enum Tag<'a> { |
102 | /// A paragraph of text and other inline elements. |
103 | Paragraph, |
104 | |
105 | /// A heading. The first field indicates the level of the heading, |
106 | /// the second the fragment identifier, and the third the classes. |
107 | Heading(HeadingLevel, Option<&'a str>, Vec<&'a str>), |
108 | |
109 | BlockQuote, |
110 | /// A code block. |
111 | CodeBlock(CodeBlockKind<'a>), |
112 | |
113 | /// A list. If the list is ordered the field indicates the number of the first item. |
114 | /// Contains only list items. |
115 | List(Option<u64>), // TODO: add delim and tight for ast (not needed for html) |
116 | /// A list item. |
117 | Item, |
118 | /// A footnote definition. The value contained is the footnote's label by which it can |
119 | /// be referred to. |
120 | #[cfg_attr (feature = "serde" , serde(borrow))] |
121 | FootnoteDefinition(CowStr<'a>), |
122 | |
123 | /// A table. Contains a vector describing the text-alignment for each of its columns. |
124 | Table(Vec<Alignment>), |
125 | /// A table header. Contains only `TableCell`s. Note that the table body starts immediately |
126 | /// after the closure of the `TableHead` tag. There is no `TableBody` tag. |
127 | TableHead, |
128 | /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s. |
129 | TableRow, |
130 | TableCell, |
131 | |
132 | // span-level tags |
133 | Emphasis, |
134 | Strong, |
135 | Strikethrough, |
136 | |
137 | /// A link. The first field is the link type, the second the destination URL and the third is a title. |
138 | Link(LinkType, CowStr<'a>, CowStr<'a>), |
139 | |
140 | /// An image. The first field is the link type, the second the destination URL and the third is a title. |
141 | Image(LinkType, CowStr<'a>, CowStr<'a>), |
142 | } |
143 | |
144 | #[derive (Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] |
145 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
146 | pub enum HeadingLevel { |
147 | H1 = 1, |
148 | H2, |
149 | H3, |
150 | H4, |
151 | H5, |
152 | H6, |
153 | } |
154 | |
155 | impl Display for HeadingLevel { |
156 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
157 | match self { |
158 | Self::H1 => write!(f, "h1" ), |
159 | Self::H2 => write!(f, "h2" ), |
160 | Self::H3 => write!(f, "h3" ), |
161 | Self::H4 => write!(f, "h4" ), |
162 | Self::H5 => write!(f, "h5" ), |
163 | Self::H6 => write!(f, "h6" ), |
164 | } |
165 | } |
166 | } |
167 | |
168 | /// Returned when trying to convert a `usize` into a `Heading` but it fails |
169 | /// because the usize isn't a valid heading level |
170 | #[derive (Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] |
171 | pub struct InvalidHeadingLevel(usize); |
172 | |
173 | impl TryFrom<usize> for HeadingLevel { |
174 | type Error = InvalidHeadingLevel; |
175 | |
176 | fn try_from(value: usize) -> Result<Self, Self::Error> { |
177 | match value { |
178 | 1 => Ok(Self::H1), |
179 | 2 => Ok(Self::H2), |
180 | 3 => Ok(Self::H3), |
181 | 4 => Ok(Self::H4), |
182 | 5 => Ok(Self::H5), |
183 | 6 => Ok(Self::H6), |
184 | _ => Err(InvalidHeadingLevel(value)), |
185 | } |
186 | } |
187 | } |
188 | |
189 | /// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information. |
190 | #[derive (Clone, Debug, PartialEq, Copy)] |
191 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
192 | pub enum LinkType { |
193 | /// Inline link like `[foo](bar)` |
194 | Inline, |
195 | /// Reference link like `[foo][bar]` |
196 | Reference, |
197 | /// Reference without destination in the document, but resolved by the broken_link_callback |
198 | ReferenceUnknown, |
199 | /// Collapsed link like `[foo][]` |
200 | Collapsed, |
201 | /// Collapsed link without destination in the document, but resolved by the broken_link_callback |
202 | CollapsedUnknown, |
203 | /// Shortcut link like `[foo]` |
204 | Shortcut, |
205 | /// Shortcut without destination in the document, but resolved by the broken_link_callback |
206 | ShortcutUnknown, |
207 | /// Autolink like `<http://foo.bar/baz>` |
208 | Autolink, |
209 | /// Email address in autolink like `<john@example.org>` |
210 | Email, |
211 | } |
212 | |
213 | impl LinkType { |
214 | fn to_unknown(self) -> Self { |
215 | match self { |
216 | LinkType::Reference => LinkType::ReferenceUnknown, |
217 | LinkType::Collapsed => LinkType::CollapsedUnknown, |
218 | LinkType::Shortcut => LinkType::ShortcutUnknown, |
219 | _ => unreachable!(), |
220 | } |
221 | } |
222 | } |
223 | |
224 | /// Markdown events that are generated in a preorder traversal of the document |
225 | /// tree, with additional `End` events whenever all of an inner node's children |
226 | /// have been visited. |
227 | #[derive (Clone, Debug, PartialEq)] |
228 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
229 | pub enum Event<'a> { |
230 | /// Start of a tagged element. Events that are yielded after this event |
231 | /// and before its corresponding `End` event are inside this element. |
232 | /// Start and end events are guaranteed to be balanced. |
233 | #[cfg_attr (feature = "serde" , serde(borrow))] |
234 | Start(Tag<'a>), |
235 | /// End of a tagged element. |
236 | #[cfg_attr (feature = "serde" , serde(borrow))] |
237 | End(Tag<'a>), |
238 | /// A text node. |
239 | #[cfg_attr (feature = "serde" , serde(borrow))] |
240 | Text(CowStr<'a>), |
241 | /// An inline code node. |
242 | #[cfg_attr (feature = "serde" , serde(borrow))] |
243 | Code(CowStr<'a>), |
244 | /// An HTML node. |
245 | #[cfg_attr (feature = "serde" , serde(borrow))] |
246 | Html(CowStr<'a>), |
247 | /// A reference to a footnote with given label, which may or may not be defined |
248 | /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may |
249 | /// occur in any order. |
250 | #[cfg_attr (feature = "serde" , serde(borrow))] |
251 | FootnoteReference(CowStr<'a>), |
252 | /// A soft line break. |
253 | SoftBreak, |
254 | /// A hard line break. |
255 | HardBreak, |
256 | /// A horizontal ruler. |
257 | Rule, |
258 | /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked. |
259 | TaskListMarker(bool), |
260 | } |
261 | |
262 | /// Table column text alignment. |
263 | #[derive (Copy, Clone, Debug, PartialEq)] |
264 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
265 | |
266 | pub enum Alignment { |
267 | /// Default text alignment. |
268 | None, |
269 | Left, |
270 | Center, |
271 | Right, |
272 | } |
273 | |
274 | bitflags::bitflags! { |
275 | /// Option struct containing flags for enabling extra features |
276 | /// that are not part of the CommonMark spec. |
277 | #[derive (Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
278 | pub struct Options: u32 { |
279 | const ENABLE_TABLES = 1 << 1; |
280 | const ENABLE_FOOTNOTES = 1 << 2; |
281 | const ENABLE_STRIKETHROUGH = 1 << 3; |
282 | const ENABLE_TASKLISTS = 1 << 4; |
283 | const ENABLE_SMART_PUNCTUATION = 1 << 5; |
284 | /// Extension to allow headings to have ID and classes. |
285 | /// |
286 | /// `# text { #id .class1 .class2 }` is interpreted as a level 1 heading |
287 | /// with the content `text`, ID `id`, and classes `class1` and `class2`. |
288 | /// Note that attributes (ID and classes) should be space-separated. |
289 | const ENABLE_HEADING_ATTRIBUTES = 1 << 6; |
290 | } |
291 | } |
292 | |