1 | #![allow (missing_docs)] // FIXME: Document this |
2 | |
3 | pub mod fs; |
4 | mod string; |
5 | pub(crate) mod toml_ext; |
6 | use crate::errors::Error; |
7 | use log::error; |
8 | use once_cell::sync::Lazy; |
9 | use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag}; |
10 | use regex::Regex; |
11 | |
12 | use std::borrow::Cow; |
13 | use std::collections::HashMap; |
14 | use std::fmt::Write; |
15 | use std::path::Path; |
16 | |
17 | pub use self::string::{ |
18 | take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, |
19 | take_rustdoc_include_lines, |
20 | }; |
21 | |
22 | /// Replaces multiple consecutive whitespace characters with a single space character. |
23 | pub fn collapse_whitespace(text: &str) -> Cow<'_, str> { |
24 | static RE: Lazy<Regex> = Lazy::new(|| Regex::new(re:r"\s\s+" ).unwrap()); |
25 | RE.replace_all(text, rep:" " ) |
26 | } |
27 | |
28 | /// Convert the given string to a valid HTML element ID. |
29 | /// The only restriction is that the ID must not contain any ASCII whitespace. |
30 | pub fn normalize_id(content: &str) -> String { |
31 | contentimpl Iterator |
32 | .chars() |
33 | .filter_map(|ch: char| { |
34 | if ch.is_alphanumeric() || ch == '_' || ch == '-' { |
35 | Some(ch.to_ascii_lowercase()) |
36 | } else if ch.is_whitespace() { |
37 | Some('-' ) |
38 | } else { |
39 | None |
40 | } |
41 | }) |
42 | .collect::<String>() |
43 | } |
44 | |
45 | /// Generate an ID for use with anchors which is derived from a "normalised" |
46 | /// string. |
47 | // This function should be made private when the deprecation expires. |
48 | #[deprecated (since = "0.4.16" , note = "use unique_id_from_content instead" )] |
49 | pub fn id_from_content(content: &str) -> String { |
50 | let mut content: String = content.to_string(); |
51 | |
52 | // Skip any tags or html-encoded stuff |
53 | static HTML: Lazy<Regex> = Lazy::new(|| Regex::new(re:r"(<.*?>)" ).unwrap()); |
54 | content = HTML.replace_all(&content, rep:"" ).into(); |
55 | const REPL_SUB: &[&str] = &["<" , ">" , "&" , "'" , """ ]; |
56 | for sub: &&str in REPL_SUB { |
57 | content = content.replace(from:sub, to:"" ); |
58 | } |
59 | |
60 | // Remove spaces and hashes indicating a header |
61 | let trimmed: &str = content.trim().trim_start_matches('#' ).trim(); |
62 | normalize_id(content:trimmed) |
63 | } |
64 | |
65 | /// Generate an ID for use with anchors which is derived from a "normalised" |
66 | /// string. |
67 | /// |
68 | /// Each ID returned will be unique, if the same `id_counter` is provided on |
69 | /// each call. |
70 | pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String { |
71 | let id: String = { |
72 | #[allow (deprecated)] |
73 | id_from_content(content) |
74 | }; |
75 | |
76 | // If we have headers with the same normalized id, append an incrementing counter |
77 | let id_count: &mut usize = id_counter.entry(id.clone()).or_insert(default:0); |
78 | let unique_id: String = match *id_count { |
79 | 0 => id, |
80 | id_count: usize => format!(" {}- {}" , id, id_count), |
81 | }; |
82 | *id_count += 1; |
83 | unique_id |
84 | } |
85 | |
86 | /// Fix links to the correct location. |
87 | /// |
88 | /// This adjusts links, such as turning `.md` extensions to `.html`. |
89 | /// |
90 | /// `path` is the path to the page being rendered relative to the root of the |
91 | /// book. This is used for the `print.html` page so that links on the print |
92 | /// page go to the original location. Normal page rendering sets `path` to |
93 | /// None. Ideally, print page links would link to anchors on the print page, |
94 | /// but that is very difficult. |
95 | fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { |
96 | static SCHEME_LINK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:" ).unwrap()); |
97 | static MD_LINK: Lazy<Regex> = |
98 | Lazy::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?" ).unwrap()); |
99 | |
100 | fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { |
101 | if dest.starts_with('#' ) { |
102 | // Fragment-only link. |
103 | if let Some(path) = path { |
104 | let mut base = path.display().to_string(); |
105 | if base.ends_with(".md" ) { |
106 | base.replace_range(base.len() - 3.., ".html" ); |
107 | } |
108 | return format!(" {}{}" , base, dest).into(); |
109 | } else { |
110 | return dest; |
111 | } |
112 | } |
113 | // Don't modify links with schemes like `https`. |
114 | if !SCHEME_LINK.is_match(&dest) { |
115 | // This is a relative link, adjust it as necessary. |
116 | let mut fixed_link = String::new(); |
117 | if let Some(path) = path { |
118 | let base = path |
119 | .parent() |
120 | .expect("path can't be empty" ) |
121 | .to_str() |
122 | .expect("utf-8 paths only" ); |
123 | if !base.is_empty() { |
124 | write!(fixed_link, " {}/" , base).unwrap(); |
125 | } |
126 | } |
127 | |
128 | if let Some(caps) = MD_LINK.captures(&dest) { |
129 | fixed_link.push_str(&caps["link" ]); |
130 | fixed_link.push_str(".html" ); |
131 | if let Some(anchor) = caps.name("anchor" ) { |
132 | fixed_link.push_str(anchor.as_str()); |
133 | } |
134 | } else { |
135 | fixed_link.push_str(&dest); |
136 | }; |
137 | return CowStr::from(fixed_link); |
138 | } |
139 | dest |
140 | } |
141 | |
142 | fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { |
143 | // This is a terrible hack, but should be reasonably reliable. Nobody |
144 | // should ever parse a tag with a regex. However, there isn't anything |
145 | // in Rust that I know of that is suitable for handling partial html |
146 | // fragments like those generated by pulldown_cmark. |
147 | // |
148 | // There are dozens of HTML tags/attributes that contain paths, so |
149 | // feel free to add more tags if desired; these are the only ones I |
150 | // care about right now. |
151 | static HTML_LINK: Lazy<Regex> = |
152 | Lazy::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""# ).unwrap()); |
153 | |
154 | HTML_LINK |
155 | .replace_all(&html, |caps: ®ex::Captures<'_>| { |
156 | let fixed = fix(caps[2].into(), path); |
157 | format!(" {}{}\"" , &caps[1], fixed) |
158 | }) |
159 | .into_owned() |
160 | .into() |
161 | } |
162 | |
163 | match event { |
164 | Event::Start(Tag::Link(link_type, dest, title)) => { |
165 | Event::Start(Tag::Link(link_type, fix(dest, path), title)) |
166 | } |
167 | Event::Start(Tag::Image(link_type, dest, title)) => { |
168 | Event::Start(Tag::Image(link_type, fix(dest, path), title)) |
169 | } |
170 | Event::Html(html) => Event::Html(fix_html(html, path)), |
171 | _ => event, |
172 | } |
173 | } |
174 | |
175 | /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. |
176 | pub fn render_markdown(text: &str, curly_quotes: bool) -> String { |
177 | render_markdown_with_path(text, curly_quotes, path:None) |
178 | } |
179 | |
180 | pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> { |
181 | let mut opts: Options = Options::empty(); |
182 | opts.insert(Options::ENABLE_TABLES); |
183 | opts.insert(Options::ENABLE_FOOTNOTES); |
184 | opts.insert(Options::ENABLE_STRIKETHROUGH); |
185 | opts.insert(Options::ENABLE_TASKLISTS); |
186 | opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); |
187 | if curly_quotes { |
188 | opts.insert(Options::ENABLE_SMART_PUNCTUATION); |
189 | } |
190 | Parser::new_ext(text, options:opts) |
191 | } |
192 | |
193 | pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String { |
194 | let mut s: String = String::with_capacity(text.len() * 3 / 2); |
195 | let p: Parser<'_, '_> = new_cmark_parser(text, curly_quotes); |
196 | let events: impl Iterator- >
= pimpl Iterator- >
|
197 | .map(clean_codeblock_headers) |
198 | .map(|event: Event<'_>| adjust_links(event, path)) |
199 | .flat_map(|event: Event<'_>| { |
200 | let (a: Option>, b: Option>) = wrap_tables(event); |
201 | a.into_iter().chain(b) |
202 | }); |
203 | |
204 | html::push_html(&mut s, iter:events); |
205 | s |
206 | } |
207 | |
208 | /// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to. |
209 | fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) { |
210 | match event { |
211 | Event::Start(Tag::Table(_)) => ( |
212 | Some(Event::Html(r#"<div class="table-wrapper">"# .into())), |
213 | Some(event), |
214 | ), |
215 | Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"# .into()))), |
216 | _ => (Some(event), None), |
217 | } |
218 | } |
219 | |
220 | fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { |
221 | match event { |
222 | Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info: &CowStr<'_>))) => { |
223 | let info: String = infoimpl Iterator |
224 | .chars() |
225 | .map(|x: char| match x { |
226 | ' ' | ' \t' => ',' , |
227 | _ => x, |
228 | }) |
229 | .filter(|ch: &char| !ch.is_whitespace()) |
230 | .collect(); |
231 | |
232 | Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info)))) |
233 | } |
234 | _ => event, |
235 | } |
236 | } |
237 | |
238 | /// Prints a "backtrace" of some `Error`. |
239 | pub fn log_backtrace(e: &Error) { |
240 | error!("Error: {}" , e); |
241 | |
242 | for cause: &dyn Error in e.chain().skip(1) { |
243 | error!(" \tCaused By: {}" , cause); |
244 | } |
245 | } |
246 | |
247 | pub(crate) fn bracket_escape(mut s: &str) -> String { |
248 | let mut escaped: String = String::with_capacity(s.len()); |
249 | let needs_escape: &[char] = &['<' , '>' ]; |
250 | while let Some(next: usize) = s.find(needs_escape) { |
251 | escaped.push_str(&s[..next]); |
252 | match s.as_bytes()[next] { |
253 | b'<' => escaped.push_str(string:"<" ), |
254 | b'>' => escaped.push_str(string:">" ), |
255 | _ => unreachable!(), |
256 | } |
257 | s = &s[next + 1..]; |
258 | } |
259 | escaped.push_str(string:s); |
260 | escaped |
261 | } |
262 | |
263 | #[cfg (test)] |
264 | mod tests { |
265 | use super::bracket_escape; |
266 | |
267 | mod render_markdown { |
268 | use super::super::render_markdown; |
269 | |
270 | #[test ] |
271 | fn preserves_external_links() { |
272 | assert_eq!( |
273 | render_markdown("[example](https://www.rust-lang.org/)" , false), |
274 | "<p><a href= \"https://www.rust-lang.org/ \">example</a></p> \n" |
275 | ); |
276 | } |
277 | |
278 | #[test ] |
279 | fn it_can_adjust_markdown_links() { |
280 | assert_eq!( |
281 | render_markdown("[example](example.md)" , false), |
282 | "<p><a href= \"example.html \">example</a></p> \n" |
283 | ); |
284 | assert_eq!( |
285 | render_markdown("[example_anchor](example.md#anchor)" , false), |
286 | "<p><a href= \"example.html#anchor \">example_anchor</a></p> \n" |
287 | ); |
288 | |
289 | // this anchor contains 'md' inside of it |
290 | assert_eq!( |
291 | render_markdown("[phantom data](foo.html#phantomdata)" , false), |
292 | "<p><a href= \"foo.html#phantomdata \">phantom data</a></p> \n" |
293 | ); |
294 | } |
295 | |
296 | #[test ] |
297 | fn it_can_wrap_tables() { |
298 | let src = r#" |
299 | | Original | Punycode | Punycode + Encoding | |
300 | |-----------------|-----------------|---------------------| |
301 | | føø | f-5gaa | f_5gaa | |
302 | "# ; |
303 | let out = r#" |
304 | <div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody> |
305 | <tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr> |
306 | </tbody></table> |
307 | </div> |
308 | "# .trim(); |
309 | assert_eq!(render_markdown(src, false), out); |
310 | } |
311 | |
312 | #[test ] |
313 | fn it_can_keep_quotes_straight() { |
314 | assert_eq!(render_markdown("'one'" , false), "<p>'one'</p> \n" ); |
315 | } |
316 | |
317 | #[test ] |
318 | fn it_can_make_quotes_curly_except_when_they_are_in_code() { |
319 | let input = r#" |
320 | 'one' |
321 | ``` |
322 | 'two' |
323 | ``` |
324 | `'three'` 'four'"# ; |
325 | let expected = r#"<p>‘one’</p> |
326 | <pre><code>'two' |
327 | </code></pre> |
328 | <p><code>'three'</code> ‘four’</p> |
329 | "# ; |
330 | assert_eq!(render_markdown(input, true), expected); |
331 | } |
332 | |
333 | #[test ] |
334 | fn whitespace_outside_of_codeblock_header_is_preserved() { |
335 | let input = r#" |
336 | some text with spaces |
337 | ```rust |
338 | fn main() { |
339 | // code inside is unchanged |
340 | } |
341 | ``` |
342 | more text with spaces |
343 | "# ; |
344 | |
345 | let expected = r#"<p>some text with spaces</p> |
346 | <pre><code class="language-rust">fn main() { |
347 | // code inside is unchanged |
348 | } |
349 | </code></pre> |
350 | <p>more text with spaces</p> |
351 | "# ; |
352 | assert_eq!(render_markdown(input, false), expected); |
353 | assert_eq!(render_markdown(input, true), expected); |
354 | } |
355 | |
356 | #[test ] |
357 | fn rust_code_block_properties_are_passed_as_space_delimited_class() { |
358 | let input = r#" |
359 | ```rust,no_run,should_panic,property_3 |
360 | ``` |
361 | "# ; |
362 | |
363 | let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre> |
364 | "# ; |
365 | assert_eq!(render_markdown(input, false), expected); |
366 | assert_eq!(render_markdown(input, true), expected); |
367 | } |
368 | |
369 | #[test ] |
370 | fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() { |
371 | let input = r#" |
372 | ```rust, no_run,,,should_panic , ,property_3 |
373 | ``` |
374 | "# ; |
375 | |
376 | let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre> |
377 | "# ; |
378 | assert_eq!(render_markdown(input, false), expected); |
379 | assert_eq!(render_markdown(input, true), expected); |
380 | } |
381 | |
382 | #[test ] |
383 | fn rust_code_block_without_properties_has_proper_html_class() { |
384 | let input = r#" |
385 | ```rust |
386 | ``` |
387 | "# ; |
388 | |
389 | let expected = r#"<pre><code class="language-rust"></code></pre> |
390 | "# ; |
391 | assert_eq!(render_markdown(input, false), expected); |
392 | assert_eq!(render_markdown(input, true), expected); |
393 | |
394 | let input = r#" |
395 | ```rust |
396 | ``` |
397 | "# ; |
398 | assert_eq!(render_markdown(input, false), expected); |
399 | assert_eq!(render_markdown(input, true), expected); |
400 | } |
401 | } |
402 | |
403 | #[allow (deprecated)] |
404 | mod id_from_content { |
405 | use super::super::id_from_content; |
406 | |
407 | #[test ] |
408 | fn it_generates_anchors() { |
409 | assert_eq!( |
410 | id_from_content("## Method-call expressions" ), |
411 | "method-call-expressions" |
412 | ); |
413 | assert_eq!(id_from_content("## **Bold** title" ), "bold-title" ); |
414 | assert_eq!(id_from_content("## `Code` title" ), "code-title" ); |
415 | assert_eq!( |
416 | id_from_content("## title <span dir=rtl>foo</span>" ), |
417 | "title-foo" |
418 | ); |
419 | } |
420 | |
421 | #[test ] |
422 | fn it_generates_anchors_from_non_ascii_initial() { |
423 | assert_eq!( |
424 | id_from_content("## `--passes`: add more rustdoc passes" ), |
425 | "--passes-add-more-rustdoc-passes" |
426 | ); |
427 | assert_eq!( |
428 | id_from_content("## 中文標題 CJK title" ), |
429 | "中文標題-cjk-title" |
430 | ); |
431 | assert_eq!(id_from_content("## Über" ), "Über" ); |
432 | } |
433 | } |
434 | |
435 | mod html_munging { |
436 | use super::super::{normalize_id, unique_id_from_content}; |
437 | |
438 | #[test ] |
439 | fn it_normalizes_ids() { |
440 | assert_eq!( |
441 | normalize_id("`--passes`: add more rustdoc passes" ), |
442 | "--passes-add-more-rustdoc-passes" |
443 | ); |
444 | assert_eq!( |
445 | normalize_id("Method-call 🐙 expressions \u{1f47c}" ), |
446 | "method-call--expressions-" |
447 | ); |
448 | assert_eq!(normalize_id("_-_12345" ), "_-_12345" ); |
449 | assert_eq!(normalize_id("12345" ), "12345" ); |
450 | assert_eq!(normalize_id("中文" ), "中文" ); |
451 | assert_eq!(normalize_id("にほんご" ), "にほんご" ); |
452 | assert_eq!(normalize_id("한국어" ), "한국어" ); |
453 | assert_eq!(normalize_id("" ), "" ); |
454 | } |
455 | |
456 | #[test ] |
457 | fn it_generates_unique_ids_from_content() { |
458 | // Same id if not given shared state |
459 | assert_eq!( |
460 | unique_id_from_content("## 中文標題 CJK title" , &mut Default::default()), |
461 | "中文標題-cjk-title" |
462 | ); |
463 | assert_eq!( |
464 | unique_id_from_content("## 中文標題 CJK title" , &mut Default::default()), |
465 | "中文標題-cjk-title" |
466 | ); |
467 | |
468 | // Different id if given shared state |
469 | let mut id_counter = Default::default(); |
470 | assert_eq!(unique_id_from_content("## Über" , &mut id_counter), "Über" ); |
471 | assert_eq!( |
472 | unique_id_from_content("## 中文標題 CJK title" , &mut id_counter), |
473 | "中文標題-cjk-title" |
474 | ); |
475 | assert_eq!(unique_id_from_content("## Über" , &mut id_counter), "Über-1" ); |
476 | assert_eq!(unique_id_from_content("## Über" , &mut id_counter), "Über-2" ); |
477 | } |
478 | } |
479 | |
480 | #[test ] |
481 | fn escaped_brackets() { |
482 | assert_eq!(bracket_escape("" ), "" ); |
483 | assert_eq!(bracket_escape("<" ), "<" ); |
484 | assert_eq!(bracket_escape(">" ), ">" ); |
485 | assert_eq!(bracket_escape("<>" ), "<>" ); |
486 | assert_eq!(bracket_escape("<test>" ), "<test>" ); |
487 | assert_eq!(bracket_escape("a<test>b" ), "a<test>b" ); |
488 | } |
489 | } |
490 | |