1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to output HTML using the [HTML module](html/index.html).
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use pulldown_cmark::{Parser, Options, html};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! // Set up options and parser. Strikethroughs are not part of the CommonMark standard
36//! // and we therefore must enable it explicitly.
37//! let mut options = Options::empty();
38//! options.insert(Options::ENABLE_STRIKETHROUGH);
39//! let parser = Parser::new_ext(markdown_input, options);
40//!
41//! // Write to String buffer.
42//! let mut html_output = String::new();
43//! html::push_html(&mut html_output, parser);
44//!
45//! // Check that the output is what we expected.
46//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
47//! assert_eq!(expected_html, &html_output);
48//! ```
49
50// When compiled for the rustc compiler itself we want to make sure that this is
51// an unstable crate.
52#![cfg_attr(rustbuild, feature(staged_api, rustc_private))]
53#![cfg_attr(rustbuild, unstable(feature = "rustc_private", issue = "27812"))]
54// Forbid unsafe code unless the SIMD feature is enabled.
55#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
56#![warn(missing_debug_implementations)]
57
58#[cfg(feature = "serde")]
59use serde::{Deserialize, Serialize};
60
61pub mod html;
62
63mod entities;
64pub mod escape;
65mod firstpass;
66mod linklabel;
67mod parse;
68mod puncttable;
69mod scanners;
70mod strings;
71mod tree;
72
73use std::{convert::TryFrom, fmt::Display};
74
75pub use crate::parse::{BrokenLink, BrokenLinkCallback, LinkDef, OffsetIter, Parser, RefDefs};
76pub use crate::strings::{CowStr, InlineStr};
77
78/// Codeblock kind.
79#[derive(Clone, Debug, PartialEq)]
80#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
81pub enum CodeBlockKind<'a> {
82 Indented,
83 /// The value contained in the tag describes the language of the code, which may be empty.
84 #[cfg_attr(feature = "serde", serde(borrow))]
85 Fenced(CowStr<'a>),
86}
87
88impl<'a> CodeBlockKind<'a> {
89 pub fn is_indented(&self) -> bool {
90 matches!(*self, CodeBlockKind::Indented)
91 }
92
93 pub fn is_fenced(&self) -> bool {
94 matches!(*self, CodeBlockKind::Fenced(_))
95 }
96}
97
98/// Tags for elements that can contain other elements.
99#[derive(Clone, Debug, PartialEq)]
100#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
101pub enum Tag<'a> {
102 /// A paragraph of text and other inline elements.
103 Paragraph,
104
105 /// A heading. The first field indicates the level of the heading,
106 /// the second the fragment identifier, and the third the classes.
107 Heading(HeadingLevel, Option<&'a str>, Vec<&'a str>),
108
109 BlockQuote,
110 /// A code block.
111 CodeBlock(CodeBlockKind<'a>),
112
113 /// A list. If the list is ordered the field indicates the number of the first item.
114 /// Contains only list items.
115 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
116 /// A list item.
117 Item,
118 /// A footnote definition. The value contained is the footnote's label by which it can
119 /// be referred to.
120 #[cfg_attr(feature = "serde", serde(borrow))]
121 FootnoteDefinition(CowStr<'a>),
122
123 /// A table. Contains a vector describing the text-alignment for each of its columns.
124 Table(Vec<Alignment>),
125 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
126 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
127 TableHead,
128 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
129 TableRow,
130 TableCell,
131
132 // span-level tags
133 Emphasis,
134 Strong,
135 Strikethrough,
136
137 /// A link. The first field is the link type, the second the destination URL and the third is a title.
138 Link(LinkType, CowStr<'a>, CowStr<'a>),
139
140 /// An image. The first field is the link type, the second the destination URL and the third is a title.
141 Image(LinkType, CowStr<'a>, CowStr<'a>),
142}
143
144#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
145#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
146pub enum HeadingLevel {
147 H1 = 1,
148 H2,
149 H3,
150 H4,
151 H5,
152 H6,
153}
154
155impl Display for HeadingLevel {
156 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157 match self {
158 Self::H1 => write!(f, "h1"),
159 Self::H2 => write!(f, "h2"),
160 Self::H3 => write!(f, "h3"),
161 Self::H4 => write!(f, "h4"),
162 Self::H5 => write!(f, "h5"),
163 Self::H6 => write!(f, "h6"),
164 }
165 }
166}
167
168/// Returned when trying to convert a `usize` into a `Heading` but it fails
169/// because the usize isn't a valid heading level
170#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
171pub struct InvalidHeadingLevel(usize);
172
173impl TryFrom<usize> for HeadingLevel {
174 type Error = InvalidHeadingLevel;
175
176 fn try_from(value: usize) -> Result<Self, Self::Error> {
177 match value {
178 1 => Ok(Self::H1),
179 2 => Ok(Self::H2),
180 3 => Ok(Self::H3),
181 4 => Ok(Self::H4),
182 5 => Ok(Self::H5),
183 6 => Ok(Self::H6),
184 _ => Err(InvalidHeadingLevel(value)),
185 }
186 }
187}
188
189/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
190#[derive(Clone, Debug, PartialEq, Copy)]
191#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
192pub enum LinkType {
193 /// Inline link like `[foo](bar)`
194 Inline,
195 /// Reference link like `[foo][bar]`
196 Reference,
197 /// Reference without destination in the document, but resolved by the broken_link_callback
198 ReferenceUnknown,
199 /// Collapsed link like `[foo][]`
200 Collapsed,
201 /// Collapsed link without destination in the document, but resolved by the broken_link_callback
202 CollapsedUnknown,
203 /// Shortcut link like `[foo]`
204 Shortcut,
205 /// Shortcut without destination in the document, but resolved by the broken_link_callback
206 ShortcutUnknown,
207 /// Autolink like `<http://foo.bar/baz>`
208 Autolink,
209 /// Email address in autolink like `<john@example.org>`
210 Email,
211}
212
213impl LinkType {
214 fn to_unknown(self) -> Self {
215 match self {
216 LinkType::Reference => LinkType::ReferenceUnknown,
217 LinkType::Collapsed => LinkType::CollapsedUnknown,
218 LinkType::Shortcut => LinkType::ShortcutUnknown,
219 _ => unreachable!(),
220 }
221 }
222}
223
224/// Markdown events that are generated in a preorder traversal of the document
225/// tree, with additional `End` events whenever all of an inner node's children
226/// have been visited.
227#[derive(Clone, Debug, PartialEq)]
228#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
229pub enum Event<'a> {
230 /// Start of a tagged element. Events that are yielded after this event
231 /// and before its corresponding `End` event are inside this element.
232 /// Start and end events are guaranteed to be balanced.
233 #[cfg_attr(feature = "serde", serde(borrow))]
234 Start(Tag<'a>),
235 /// End of a tagged element.
236 #[cfg_attr(feature = "serde", serde(borrow))]
237 End(Tag<'a>),
238 /// A text node.
239 #[cfg_attr(feature = "serde", serde(borrow))]
240 Text(CowStr<'a>),
241 /// An inline code node.
242 #[cfg_attr(feature = "serde", serde(borrow))]
243 Code(CowStr<'a>),
244 /// An HTML node.
245 #[cfg_attr(feature = "serde", serde(borrow))]
246 Html(CowStr<'a>),
247 /// A reference to a footnote with given label, which may or may not be defined
248 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
249 /// occur in any order.
250 #[cfg_attr(feature = "serde", serde(borrow))]
251 FootnoteReference(CowStr<'a>),
252 /// A soft line break.
253 SoftBreak,
254 /// A hard line break.
255 HardBreak,
256 /// A horizontal ruler.
257 Rule,
258 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
259 TaskListMarker(bool),
260}
261
262/// Table column text alignment.
263#[derive(Copy, Clone, Debug, PartialEq)]
264#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
265
266pub enum Alignment {
267 /// Default text alignment.
268 None,
269 Left,
270 Center,
271 Right,
272}
273
274bitflags::bitflags! {
275 /// Option struct containing flags for enabling extra features
276 /// that are not part of the CommonMark spec.
277 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
278 pub struct Options: u32 {
279 const ENABLE_TABLES = 1 << 1;
280 const ENABLE_FOOTNOTES = 1 << 2;
281 const ENABLE_STRIKETHROUGH = 1 << 3;
282 const ENABLE_TASKLISTS = 1 << 4;
283 const ENABLE_SMART_PUNCTUATION = 1 << 5;
284 /// Extension to allow headings to have ID and classes.
285 ///
286 /// `# text { #id .class1 .class2 }` is interpreted as a level 1 heading
287 /// with the content `text`, ID `id`, and classes `class1` and `class2`.
288 /// Note that attributes (ID and classes) should be space-separated.
289 const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
290 }
291}
292