1#![allow(missing_docs)] // FIXME: Document this
2
3pub mod fs;
4mod string;
5pub(crate) mod toml_ext;
6use crate::errors::Error;
7use log::error;
8use once_cell::sync::Lazy;
9use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
10use regex::Regex;
11
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17pub use self::string::{
18 take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
19 take_rustdoc_include_lines,
20};
21
22/// Replaces multiple consecutive whitespace characters with a single space character.
23pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
24 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(re:r"\s\s+").unwrap());
25 RE.replace_all(text, rep:" ")
26}
27
28/// Convert the given string to a valid HTML element ID.
29/// The only restriction is that the ID must not contain any ASCII whitespace.
30pub fn normalize_id(content: &str) -> String {
31 contentimpl Iterator
32 .chars()
33 .filter_map(|ch: char| {
34 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
35 Some(ch.to_ascii_lowercase())
36 } else if ch.is_whitespace() {
37 Some('-')
38 } else {
39 None
40 }
41 })
42 .collect::<String>()
43}
44
45/// Generate an ID for use with anchors which is derived from a "normalised"
46/// string.
47// This function should be made private when the deprecation expires.
48#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
49pub fn id_from_content(content: &str) -> String {
50 let mut content: String = content.to_string();
51
52 // Skip any tags or html-encoded stuff
53 static HTML: Lazy<Regex> = Lazy::new(|| Regex::new(re:r"(<.*?>)").unwrap());
54 content = HTML.replace_all(&content, rep:"").into();
55 const REPL_SUB: &[&str] = &["&lt;", "&gt;", "&amp;", "&#39;", "&quot;"];
56 for sub: &&str in REPL_SUB {
57 content = content.replace(from:sub, to:"");
58 }
59
60 // Remove spaces and hashes indicating a header
61 let trimmed: &str = content.trim().trim_start_matches('#').trim();
62 normalize_id(content:trimmed)
63}
64
65/// Generate an ID for use with anchors which is derived from a "normalised"
66/// string.
67///
68/// Each ID returned will be unique, if the same `id_counter` is provided on
69/// each call.
70pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
71 let id: String = {
72 #[allow(deprecated)]
73 id_from_content(content)
74 };
75
76 // If we have headers with the same normalized id, append an incrementing counter
77 let id_count: &mut usize = id_counter.entry(id.clone()).or_insert(default:0);
78 let unique_id: String = match *id_count {
79 0 => id,
80 id_count: usize => format!("{}-{}", id, id_count),
81 };
82 *id_count += 1;
83 unique_id
84}
85
86/// Fix links to the correct location.
87///
88/// This adjusts links, such as turning `.md` extensions to `.html`.
89///
90/// `path` is the path to the page being rendered relative to the root of the
91/// book. This is used for the `print.html` page so that links on the print
92/// page go to the original location. Normal page rendering sets `path` to
93/// None. Ideally, print page links would link to anchors on the print page,
94/// but that is very difficult.
95fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
96 static SCHEME_LINK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
97 static MD_LINK: Lazy<Regex> =
98 Lazy::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
99
100 fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
101 if dest.starts_with('#') {
102 // Fragment-only link.
103 if let Some(path) = path {
104 let mut base = path.display().to_string();
105 if base.ends_with(".md") {
106 base.replace_range(base.len() - 3.., ".html");
107 }
108 return format!("{}{}", base, dest).into();
109 } else {
110 return dest;
111 }
112 }
113 // Don't modify links with schemes like `https`.
114 if !SCHEME_LINK.is_match(&dest) {
115 // This is a relative link, adjust it as necessary.
116 let mut fixed_link = String::new();
117 if let Some(path) = path {
118 let base = path
119 .parent()
120 .expect("path can't be empty")
121 .to_str()
122 .expect("utf-8 paths only");
123 if !base.is_empty() {
124 write!(fixed_link, "{}/", base).unwrap();
125 }
126 }
127
128 if let Some(caps) = MD_LINK.captures(&dest) {
129 fixed_link.push_str(&caps["link"]);
130 fixed_link.push_str(".html");
131 if let Some(anchor) = caps.name("anchor") {
132 fixed_link.push_str(anchor.as_str());
133 }
134 } else {
135 fixed_link.push_str(&dest);
136 };
137 return CowStr::from(fixed_link);
138 }
139 dest
140 }
141
142 fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
143 // This is a terrible hack, but should be reasonably reliable. Nobody
144 // should ever parse a tag with a regex. However, there isn't anything
145 // in Rust that I know of that is suitable for handling partial html
146 // fragments like those generated by pulldown_cmark.
147 //
148 // There are dozens of HTML tags/attributes that contain paths, so
149 // feel free to add more tags if desired; these are the only ones I
150 // care about right now.
151 static HTML_LINK: Lazy<Regex> =
152 Lazy::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
153
154 HTML_LINK
155 .replace_all(&html, |caps: &regex::Captures<'_>| {
156 let fixed = fix(caps[2].into(), path);
157 format!("{}{}\"", &caps[1], fixed)
158 })
159 .into_owned()
160 .into()
161 }
162
163 match event {
164 Event::Start(Tag::Link(link_type, dest, title)) => {
165 Event::Start(Tag::Link(link_type, fix(dest, path), title))
166 }
167 Event::Start(Tag::Image(link_type, dest, title)) => {
168 Event::Start(Tag::Image(link_type, fix(dest, path), title))
169 }
170 Event::Html(html) => Event::Html(fix_html(html, path)),
171 _ => event,
172 }
173}
174
175/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
176pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
177 render_markdown_with_path(text, curly_quotes, path:None)
178}
179
180pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
181 let mut opts: Options = Options::empty();
182 opts.insert(Options::ENABLE_TABLES);
183 opts.insert(Options::ENABLE_FOOTNOTES);
184 opts.insert(Options::ENABLE_STRIKETHROUGH);
185 opts.insert(Options::ENABLE_TASKLISTS);
186 opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
187 if curly_quotes {
188 opts.insert(Options::ENABLE_SMART_PUNCTUATION);
189 }
190 Parser::new_ext(text, options:opts)
191}
192
193pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
194 let mut s: String = String::with_capacity(text.len() * 3 / 2);
195 let p: Parser<'_, '_> = new_cmark_parser(text, curly_quotes);
196 let events: impl Iterator> = pimpl Iterator>
197 .map(clean_codeblock_headers)
198 .map(|event: Event<'_>| adjust_links(event, path))
199 .flat_map(|event: Event<'_>| {
200 let (a: Option>, b: Option>) = wrap_tables(event);
201 a.into_iter().chain(b)
202 });
203
204 html::push_html(&mut s, iter:events);
205 s
206}
207
208/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
209fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
210 match event {
211 Event::Start(Tag::Table(_)) => (
212 Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
213 Some(event),
214 ),
215 Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
216 _ => (Some(event), None),
217 }
218}
219
220fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
221 match event {
222 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info: &CowStr<'_>))) => {
223 let info: String = infoimpl Iterator
224 .chars()
225 .map(|x: char| match x {
226 ' ' | '\t' => ',',
227 _ => x,
228 })
229 .filter(|ch: &char| !ch.is_whitespace())
230 .collect();
231
232 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
233 }
234 _ => event,
235 }
236}
237
238/// Prints a "backtrace" of some `Error`.
239pub fn log_backtrace(e: &Error) {
240 error!("Error: {}", e);
241
242 for cause: &dyn Error in e.chain().skip(1) {
243 error!("\tCaused By: {}", cause);
244 }
245}
246
247pub(crate) fn bracket_escape(mut s: &str) -> String {
248 let mut escaped: String = String::with_capacity(s.len());
249 let needs_escape: &[char] = &['<', '>'];
250 while let Some(next: usize) = s.find(needs_escape) {
251 escaped.push_str(&s[..next]);
252 match s.as_bytes()[next] {
253 b'<' => escaped.push_str(string:"&lt;"),
254 b'>' => escaped.push_str(string:"&gt;"),
255 _ => unreachable!(),
256 }
257 s = &s[next + 1..];
258 }
259 escaped.push_str(string:s);
260 escaped
261}
262
263#[cfg(test)]
264mod tests {
265 use super::bracket_escape;
266
267 mod render_markdown {
268 use super::super::render_markdown;
269
270 #[test]
271 fn preserves_external_links() {
272 assert_eq!(
273 render_markdown("[example](https://www.rust-lang.org/)", false),
274 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
275 );
276 }
277
278 #[test]
279 fn it_can_adjust_markdown_links() {
280 assert_eq!(
281 render_markdown("[example](example.md)", false),
282 "<p><a href=\"example.html\">example</a></p>\n"
283 );
284 assert_eq!(
285 render_markdown("[example_anchor](example.md#anchor)", false),
286 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
287 );
288
289 // this anchor contains 'md' inside of it
290 assert_eq!(
291 render_markdown("[phantom data](foo.html#phantomdata)", false),
292 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
293 );
294 }
295
296 #[test]
297 fn it_can_wrap_tables() {
298 let src = r#"
299| Original | Punycode | Punycode + Encoding |
300|-----------------|-----------------|---------------------|
301| føø | f-5gaa | f_5gaa |
302"#;
303 let out = r#"
304<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
305<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
306</tbody></table>
307</div>
308"#.trim();
309 assert_eq!(render_markdown(src, false), out);
310 }
311
312 #[test]
313 fn it_can_keep_quotes_straight() {
314 assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
315 }
316
317 #[test]
318 fn it_can_make_quotes_curly_except_when_they_are_in_code() {
319 let input = r#"
320'one'
321```
322'two'
323```
324`'three'` 'four'"#;
325 let expected = r#"<p>‘one’</p>
326<pre><code>'two'
327</code></pre>
328<p><code>'three'</code> ‘four’</p>
329"#;
330 assert_eq!(render_markdown(input, true), expected);
331 }
332
333 #[test]
334 fn whitespace_outside_of_codeblock_header_is_preserved() {
335 let input = r#"
336some text with spaces
337```rust
338fn main() {
339// code inside is unchanged
340}
341```
342more text with spaces
343"#;
344
345 let expected = r#"<p>some text with spaces</p>
346<pre><code class="language-rust">fn main() {
347// code inside is unchanged
348}
349</code></pre>
350<p>more text with spaces</p>
351"#;
352 assert_eq!(render_markdown(input, false), expected);
353 assert_eq!(render_markdown(input, true), expected);
354 }
355
356 #[test]
357 fn rust_code_block_properties_are_passed_as_space_delimited_class() {
358 let input = r#"
359```rust,no_run,should_panic,property_3
360```
361"#;
362
363 let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
364"#;
365 assert_eq!(render_markdown(input, false), expected);
366 assert_eq!(render_markdown(input, true), expected);
367 }
368
369 #[test]
370 fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
371 let input = r#"
372```rust, no_run,,,should_panic , ,property_3
373```
374"#;
375
376 let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
377"#;
378 assert_eq!(render_markdown(input, false), expected);
379 assert_eq!(render_markdown(input, true), expected);
380 }
381
382 #[test]
383 fn rust_code_block_without_properties_has_proper_html_class() {
384 let input = r#"
385```rust
386```
387"#;
388
389 let expected = r#"<pre><code class="language-rust"></code></pre>
390"#;
391 assert_eq!(render_markdown(input, false), expected);
392 assert_eq!(render_markdown(input, true), expected);
393
394 let input = r#"
395```rust
396```
397"#;
398 assert_eq!(render_markdown(input, false), expected);
399 assert_eq!(render_markdown(input, true), expected);
400 }
401 }
402
403 #[allow(deprecated)]
404 mod id_from_content {
405 use super::super::id_from_content;
406
407 #[test]
408 fn it_generates_anchors() {
409 assert_eq!(
410 id_from_content("## Method-call expressions"),
411 "method-call-expressions"
412 );
413 assert_eq!(id_from_content("## **Bold** title"), "bold-title");
414 assert_eq!(id_from_content("## `Code` title"), "code-title");
415 assert_eq!(
416 id_from_content("## title <span dir=rtl>foo</span>"),
417 "title-foo"
418 );
419 }
420
421 #[test]
422 fn it_generates_anchors_from_non_ascii_initial() {
423 assert_eq!(
424 id_from_content("## `--passes`: add more rustdoc passes"),
425 "--passes-add-more-rustdoc-passes"
426 );
427 assert_eq!(
428 id_from_content("## 中文標題 CJK title"),
429 "中文標題-cjk-title"
430 );
431 assert_eq!(id_from_content("## Über"), "Über");
432 }
433 }
434
435 mod html_munging {
436 use super::super::{normalize_id, unique_id_from_content};
437
438 #[test]
439 fn it_normalizes_ids() {
440 assert_eq!(
441 normalize_id("`--passes`: add more rustdoc passes"),
442 "--passes-add-more-rustdoc-passes"
443 );
444 assert_eq!(
445 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
446 "method-call--expressions-"
447 );
448 assert_eq!(normalize_id("_-_12345"), "_-_12345");
449 assert_eq!(normalize_id("12345"), "12345");
450 assert_eq!(normalize_id("中文"), "中文");
451 assert_eq!(normalize_id("にほんご"), "にほんご");
452 assert_eq!(normalize_id("한국어"), "한국어");
453 assert_eq!(normalize_id(""), "");
454 }
455
456 #[test]
457 fn it_generates_unique_ids_from_content() {
458 // Same id if not given shared state
459 assert_eq!(
460 unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
461 "中文標題-cjk-title"
462 );
463 assert_eq!(
464 unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
465 "中文標題-cjk-title"
466 );
467
468 // Different id if given shared state
469 let mut id_counter = Default::default();
470 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
471 assert_eq!(
472 unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
473 "中文標題-cjk-title"
474 );
475 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
476 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
477 }
478 }
479
480 #[test]
481 fn escaped_brackets() {
482 assert_eq!(bracket_escape(""), "");
483 assert_eq!(bracket_escape("<"), "&lt;");
484 assert_eq!(bracket_escape(">"), "&gt;");
485 assert_eq!(bracket_escape("<>"), "&lt;&gt;");
486 assert_eq!(bracket_escape("<test>"), "&lt;test&gt;");
487 assert_eq!(bracket_escape("a<test>b"), "a&lt;test&gt;b");
488 }
489}
490