1 | //! Markdown formatting for rustdoc. |
2 | //! |
3 | //! This module implements markdown formatting through the pulldown-cmark library. |
4 | //! |
5 | //! ``` |
6 | //! #![feature(rustc_private)] |
7 | //! |
8 | //! extern crate rustc_span; |
9 | //! |
10 | //! use rustc_span::edition::Edition; |
11 | //! use rustdoc::html::markdown::{HeadingOffset, IdMap, Markdown, ErrorCodes}; |
12 | //! |
13 | //! let s = "My *markdown* _text_" ; |
14 | //! let mut id_map = IdMap::new(); |
15 | //! let md = Markdown { |
16 | //! content: s, |
17 | //! links: &[], |
18 | //! ids: &mut id_map, |
19 | //! error_codes: ErrorCodes::Yes, |
20 | //! edition: Edition::Edition2015, |
21 | //! playground: &None, |
22 | //! heading_offset: HeadingOffset::H2, |
23 | //! }; |
24 | //! let html = md.into_string(); |
25 | //! // ... something using html |
26 | //! ``` |
27 | |
28 | use std::borrow::Cow; |
29 | use std::collections::VecDeque; |
30 | use std::fmt::Write; |
31 | use std::iter::Peekable; |
32 | use std::ops::{ControlFlow, Range}; |
33 | use std::path::PathBuf; |
34 | use std::str::{self, CharIndices}; |
35 | use std::sync::atomic::AtomicUsize; |
36 | use std::sync::{Arc, Weak}; |
37 | |
38 | use pulldown_cmark::{ |
39 | BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html, |
40 | }; |
41 | use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; |
42 | use rustc_errors::{Diag, DiagMessage}; |
43 | use rustc_hir::def_id::LocalDefId; |
44 | use rustc_middle::ty::TyCtxt; |
45 | pub(crate) use rustc_resolve::rustdoc::main_body_opts; |
46 | use rustc_resolve::rustdoc::may_be_doc_link; |
47 | use rustc_span::edition::Edition; |
48 | use rustc_span::{Span, Symbol}; |
49 | use tracing::{debug, trace}; |
50 | |
51 | use crate::clean::RenderedLink; |
52 | use crate::doctest; |
53 | use crate::doctest::GlobalTestOptions; |
54 | use crate::html::escape::{Escape, EscapeBodyText}; |
55 | use crate::html::highlight; |
56 | use crate::html::length_limit::HtmlWithLimit; |
57 | use crate::html::render::small_url_encode; |
58 | use crate::html::toc::{Toc, TocBuilder}; |
59 | |
60 | mod footnotes; |
61 | #[cfg (test)] |
62 | mod tests; |
63 | |
64 | const MAX_HEADER_LEVEL: u32 = 6; |
65 | |
66 | /// Options for rendering Markdown in summaries (e.g., in search results). |
67 | pub(crate) fn summary_opts() -> Options { |
68 | Options::ENABLE_TABLES |
69 | | Options::ENABLE_FOOTNOTES |
70 | | Options::ENABLE_STRIKETHROUGH |
71 | | Options::ENABLE_TASKLISTS |
72 | | Options::ENABLE_SMART_PUNCTUATION |
73 | } |
74 | |
75 | #[derive (Debug, Clone, Copy)] |
76 | pub enum HeadingOffset { |
77 | H1 = 0, |
78 | H2, |
79 | H3, |
80 | H4, |
81 | H5, |
82 | H6, |
83 | } |
84 | |
85 | /// When `to_string` is called, this struct will emit the HTML corresponding to |
86 | /// the rendered version of the contained markdown string. |
87 | pub struct Markdown<'a> { |
88 | pub content: &'a str, |
89 | /// A list of link replacements. |
90 | pub links: &'a [RenderedLink], |
91 | /// The current list of used header IDs. |
92 | pub ids: &'a mut IdMap, |
93 | /// Whether to allow the use of explicit error codes in doctest lang strings. |
94 | pub error_codes: ErrorCodes, |
95 | /// Default edition to use when parsing doctests (to add a `fn main`). |
96 | pub edition: Edition, |
97 | pub playground: &'a Option<Playground>, |
98 | /// Offset at which we render headings. |
99 | /// E.g. if `heading_offset: HeadingOffset::H2`, then `# something` renders an `<h2>`. |
100 | pub heading_offset: HeadingOffset, |
101 | } |
102 | /// A struct like `Markdown` that renders the markdown with a table of contents. |
103 | pub(crate) struct MarkdownWithToc<'a> { |
104 | pub(crate) content: &'a str, |
105 | pub(crate) links: &'a [RenderedLink], |
106 | pub(crate) ids: &'a mut IdMap, |
107 | pub(crate) error_codes: ErrorCodes, |
108 | pub(crate) edition: Edition, |
109 | pub(crate) playground: &'a Option<Playground>, |
110 | } |
111 | /// A tuple struct like `Markdown` that renders the markdown escaping HTML tags |
112 | /// and includes no paragraph tags. |
113 | pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap); |
114 | /// A tuple struct like `Markdown` that renders only the first paragraph. |
115 | pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]); |
116 | |
117 | #[derive (Copy, Clone, PartialEq, Debug)] |
118 | pub enum ErrorCodes { |
119 | Yes, |
120 | No, |
121 | } |
122 | |
123 | impl ErrorCodes { |
124 | pub(crate) fn from(b: bool) -> Self { |
125 | match b { |
126 | true => ErrorCodes::Yes, |
127 | false => ErrorCodes::No, |
128 | } |
129 | } |
130 | |
131 | pub(crate) fn as_bool(self) -> bool { |
132 | match self { |
133 | ErrorCodes::Yes => true, |
134 | ErrorCodes::No => false, |
135 | } |
136 | } |
137 | } |
138 | |
139 | /// Controls whether a line will be hidden or shown in HTML output. |
140 | /// |
141 | /// All lines are used in documentation tests. |
142 | pub(crate) enum Line<'a> { |
143 | Hidden(&'a str), |
144 | Shown(Cow<'a, str>), |
145 | } |
146 | |
147 | impl<'a> Line<'a> { |
148 | fn for_html(self) -> Option<Cow<'a, str>> { |
149 | match self { |
150 | Line::Shown(l: Cow<'_, str>) => Some(l), |
151 | Line::Hidden(_) => None, |
152 | } |
153 | } |
154 | |
155 | pub(crate) fn for_code(self) -> Cow<'a, str> { |
156 | match self { |
157 | Line::Shown(l: Cow<'_, str>) => l, |
158 | Line::Hidden(l: &str) => Cow::Borrowed(l), |
159 | } |
160 | } |
161 | } |
162 | |
163 | /// This function is used to handle the "hidden lines" (ie starting with `#`) in |
164 | /// doctests. It also transforms `##` back into `#`. |
165 | // FIXME: There is a minor inconsistency here. For lines that start with ##, we |
166 | // have no easy way of removing a potential single space after the hashes, which |
167 | // is done in the single # case. This inconsistency seems okay, if non-ideal. In |
168 | // order to fix it we'd have to iterate to find the first non-# character, and |
169 | // then reallocate to remove it; which would make us return a String. |
170 | pub(crate) fn map_line(s: &str) -> Line<'_> { |
171 | let trimmed: &str = s.trim(); |
172 | if trimmed.starts_with("##" ) { |
173 | Line::Shown(Cow::Owned(s.replacen(pat:"##" , to:"#" , count:1))) |
174 | } else if let Some(stripped: &str) = trimmed.strip_prefix("# " ) { |
175 | // # text |
176 | Line::Hidden(stripped) |
177 | } else if trimmed == "#" { |
178 | // We cannot handle '#text' because it could be #[attr]. |
179 | Line::Hidden("" ) |
180 | } else { |
181 | Line::Shown(Cow::Borrowed(s)) |
182 | } |
183 | } |
184 | |
185 | /// Convert chars from a title for an id. |
186 | /// |
187 | /// "Hello, world!" -> "hello-world" |
188 | fn slugify(c: char) -> Option<char> { |
189 | if c.is_alphanumeric() || c == '-' || c == '_' { |
190 | if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) } |
191 | } else if c.is_whitespace() && c.is_ascii() { |
192 | Some('-' ) |
193 | } else { |
194 | None |
195 | } |
196 | } |
197 | |
198 | #[derive (Clone, Debug)] |
199 | pub struct Playground { |
200 | pub crate_name: Option<Symbol>, |
201 | pub url: String, |
202 | } |
203 | |
204 | /// Adds syntax highlighting and playground Run buttons to Rust code blocks. |
205 | struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> { |
206 | inner: I, |
207 | check_error_codes: ErrorCodes, |
208 | edition: Edition, |
209 | // Information about the playground if a URL has been specified, containing an |
210 | // optional crate name and the URL. |
211 | playground: &'p Option<Playground>, |
212 | } |
213 | |
214 | impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> { |
215 | fn new( |
216 | iter: I, |
217 | error_codes: ErrorCodes, |
218 | edition: Edition, |
219 | playground: &'p Option<Playground>, |
220 | ) -> Self { |
221 | CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground } |
222 | } |
223 | } |
224 | |
225 | impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> { |
226 | type Item = Event<'a>; |
227 | |
228 | fn next(&mut self) -> Option<Self::Item> { |
229 | let event = self.inner.next(); |
230 | let Some(Event::Start(Tag::CodeBlock(kind))) = event else { |
231 | return event; |
232 | }; |
233 | |
234 | let mut original_text = String::new(); |
235 | for event in &mut self.inner { |
236 | match event { |
237 | Event::End(TagEnd::CodeBlock) => break, |
238 | Event::Text(ref s) => { |
239 | original_text.push_str(s); |
240 | } |
241 | _ => {} |
242 | } |
243 | } |
244 | |
245 | let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } = |
246 | match kind { |
247 | CodeBlockKind::Fenced(ref lang) => { |
248 | let parse_result = |
249 | LangString::parse_without_check(lang, self.check_error_codes, false); |
250 | if !parse_result.rust { |
251 | let added_classes = parse_result.added_classes; |
252 | let lang_string = if let Some(lang) = parse_result.unknown.first() { |
253 | format!("language- {}" , lang) |
254 | } else { |
255 | String::new() |
256 | }; |
257 | let whitespace = if added_classes.is_empty() { "" } else { " " }; |
258 | return Some(Event::Html( |
259 | format!( |
260 | "<div class= \"example-wrap \">\ |
261 | <pre class= \"{lang_string}{whitespace}{added_classes}\">\ |
262 | <code> {text}</code>\ |
263 | </pre>\ |
264 | </div>" , |
265 | added_classes = added_classes.join(" " ), |
266 | text = Escape( |
267 | original_text.strip_suffix(' \n' ).unwrap_or(&original_text) |
268 | ), |
269 | ) |
270 | .into(), |
271 | )); |
272 | } |
273 | parse_result |
274 | } |
275 | CodeBlockKind::Indented => Default::default(), |
276 | }; |
277 | |
278 | let lines = original_text.lines().filter_map(|l| map_line(l).for_html()); |
279 | let text = lines.intersperse(" \n" .into()).collect::<String>(); |
280 | |
281 | let explicit_edition = edition.is_some(); |
282 | let edition = edition.unwrap_or(self.edition); |
283 | |
284 | let playground_button = self.playground.as_ref().and_then(|playground| { |
285 | let krate = &playground.crate_name; |
286 | let url = &playground.url; |
287 | if url.is_empty() { |
288 | return None; |
289 | } |
290 | let test = original_text |
291 | .lines() |
292 | .map(|l| map_line(l).for_code()) |
293 | .intersperse(" \n" .into()) |
294 | .collect::<String>(); |
295 | let krate = krate.as_ref().map(|s| s.as_str()); |
296 | |
297 | // FIXME: separate out the code to make a code block into runnable code |
298 | // from the complicated doctest logic |
299 | let opts = GlobalTestOptions { |
300 | crate_name: krate.map(String::from).unwrap_or_default(), |
301 | no_crate_inject: false, |
302 | insert_indent_space: true, |
303 | attrs: vec![], |
304 | args_file: PathBuf::new(), |
305 | }; |
306 | let doctest = doctest::DocTestBuilder::new(&test, krate, edition, false, None, None); |
307 | let (test, _) = doctest.generate_unique_doctest(&test, false, &opts, krate); |
308 | let channel = if test.contains("#![feature(" ) { "&version=nightly" } else { "" }; |
309 | |
310 | let test_escaped = small_url_encode(test); |
311 | Some(format!( |
312 | "<a class= \"test-arrow \" \ |
313 | target= \"_blank \" \ |
314 | title= \"Run code \" \ |
315 | href= \"{url}?code= {test_escaped}{channel}&edition= {edition}\"></a>" , |
316 | )) |
317 | }); |
318 | |
319 | let tooltip = if ignore != Ignore::None { |
320 | highlight::Tooltip::Ignore |
321 | } else if compile_fail { |
322 | highlight::Tooltip::CompileFail |
323 | } else if should_panic { |
324 | highlight::Tooltip::ShouldPanic |
325 | } else if explicit_edition { |
326 | highlight::Tooltip::Edition(edition) |
327 | } else { |
328 | highlight::Tooltip::None |
329 | }; |
330 | |
331 | // insert newline to clearly separate it from the |
332 | // previous block so we can shorten the html output |
333 | let mut s = String::new(); |
334 | s.push(' \n' ); |
335 | |
336 | highlight::render_example_with_highlighting( |
337 | &text, |
338 | &mut s, |
339 | tooltip, |
340 | playground_button.as_deref(), |
341 | &added_classes, |
342 | ); |
343 | Some(Event::Html(s.into())) |
344 | } |
345 | } |
346 | |
347 | /// Make headings links with anchor IDs and build up TOC. |
348 | struct LinkReplacerInner<'a> { |
349 | links: &'a [RenderedLink], |
350 | shortcut_link: Option<&'a RenderedLink>, |
351 | } |
352 | |
353 | struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> { |
354 | iter: I, |
355 | inner: LinkReplacerInner<'a>, |
356 | } |
357 | |
358 | impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> { |
359 | fn new(iter: I, links: &'a [RenderedLink]) -> Self { |
360 | LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } } |
361 | } |
362 | } |
363 | |
364 | // FIXME: Once we have specialized trait impl (for `Iterator` impl on `LinkReplacer`), |
365 | // we can remove this type and move back `LinkReplacerInner` fields into `LinkReplacer`. |
366 | struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> { |
367 | iter: I, |
368 | inner: LinkReplacerInner<'a>, |
369 | } |
370 | |
371 | impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> { |
372 | fn new(iter: I, links: &'a [RenderedLink]) -> Self { |
373 | SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } } |
374 | } |
375 | } |
376 | |
377 | impl<'a> LinkReplacerInner<'a> { |
378 | fn handle_event(&mut self, event: &mut Event<'a>) { |
379 | // Replace intra-doc links and remove disambiguators from shortcut links (`[fn@f]`). |
380 | match event { |
381 | // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]` |
382 | // Remove any disambiguator. |
383 | Event::Start(Tag::Link { |
384 | // [fn@f] or [fn@f][] |
385 | link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, |
386 | dest_url, |
387 | title, |
388 | .. |
389 | }) => { |
390 | debug!("saw start of shortcut link to {dest_url} with title {title}" ); |
391 | // If this is a shortcut link, it was resolved by the broken_link_callback. |
392 | // So the URL will already be updated properly. |
393 | let link = self.links.iter().find(|&link| *link.href == **dest_url); |
394 | // Since this is an external iterator, we can't replace the inner text just yet. |
395 | // Store that we saw a link so we know to replace it later. |
396 | if let Some(link) = link { |
397 | trace!("it matched" ); |
398 | assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested" ); |
399 | self.shortcut_link = Some(link); |
400 | if title.is_empty() && !link.tooltip.is_empty() { |
401 | *title = CowStr::Borrowed(link.tooltip.as_ref()); |
402 | } |
403 | } |
404 | } |
405 | // Now that we're done with the shortcut link, don't replace any more text. |
406 | Event::End(TagEnd::Link) if self.shortcut_link.is_some() => { |
407 | debug!("saw end of shortcut link" ); |
408 | self.shortcut_link = None; |
409 | } |
410 | // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link. |
411 | // [`fn@f`] |
412 | Event::Code(text) => { |
413 | trace!("saw code {text}" ); |
414 | if let Some(link) = self.shortcut_link { |
415 | // NOTE: this only replaces if the code block is the *entire* text. |
416 | // If only part of the link has code highlighting, the disambiguator will not be removed. |
417 | // e.g. [fn@`f`] |
418 | // This is a limitation from `collect_intra_doc_links`: it passes a full link, |
419 | // and does not distinguish at all between code blocks. |
420 | // So we could never be sure we weren't replacing too much: |
421 | // [fn@my_`f`unc] is treated the same as [my_func()] in that pass. |
422 | // |
423 | // NOTE: .get(1..len() - 1) is to strip the backticks |
424 | if let Some(link) = self.links.iter().find(|l| { |
425 | l.href == link.href |
426 | && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1) |
427 | }) { |
428 | debug!("replacing {text} with {new_text}" , new_text = link.new_text); |
429 | *text = CowStr::Borrowed(&link.new_text); |
430 | } |
431 | } |
432 | } |
433 | // Replace plain text in links, but only in the middle of a shortcut link. |
434 | // [fn@f] |
435 | Event::Text(text) => { |
436 | trace!("saw text {text}" ); |
437 | if let Some(link) = self.shortcut_link { |
438 | // NOTE: same limitations as `Event::Code` |
439 | if let Some(link) = self |
440 | .links |
441 | .iter() |
442 | .find(|l| l.href == link.href && **text == *l.original_text) |
443 | { |
444 | debug!("replacing {text} with {new_text}" , new_text = link.new_text); |
445 | *text = CowStr::Borrowed(&link.new_text); |
446 | } |
447 | } |
448 | } |
449 | // If this is a link, but not a shortcut link, |
450 | // replace the URL, since the broken_link_callback was not called. |
451 | Event::Start(Tag::Link { dest_url, title, .. }) => { |
452 | if let Some(link) = |
453 | self.links.iter().find(|&link| *link.original_text == **dest_url) |
454 | { |
455 | *dest_url = CowStr::Borrowed(link.href.as_ref()); |
456 | if title.is_empty() && !link.tooltip.is_empty() { |
457 | *title = CowStr::Borrowed(link.tooltip.as_ref()); |
458 | } |
459 | } |
460 | } |
461 | // Anything else couldn't have been a valid Rust path, so no need to replace the text. |
462 | _ => {} |
463 | } |
464 | } |
465 | } |
466 | |
467 | impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> { |
468 | type Item = Event<'a>; |
469 | |
470 | fn next(&mut self) -> Option<Self::Item> { |
471 | let mut event: Option<{unknown}> = self.iter.next(); |
472 | if let Some(ref mut event: &mut {unknown}) = event { |
473 | self.inner.handle_event(event); |
474 | } |
475 | // Yield the modified event |
476 | event |
477 | } |
478 | } |
479 | |
480 | impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> { |
481 | type Item = SpannedEvent<'a>; |
482 | |
483 | fn next(&mut self) -> Option<Self::Item> { |
484 | let (mut event, range: Range) = self.iter.next()?; |
485 | self.inner.handle_event(&mut event); |
486 | // Yield the modified event |
487 | Some((event, range)) |
488 | } |
489 | } |
490 | |
491 | /// Wrap HTML tables into `<div>` to prevent having the doc blocks width being too big. |
492 | struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> { |
493 | inner: I, |
494 | stored_events: VecDeque<Event<'a>>, |
495 | } |
496 | |
497 | impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> { |
498 | fn new(iter: I) -> Self { |
499 | Self { inner: iter, stored_events: VecDeque::new() } |
500 | } |
501 | } |
502 | |
503 | impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> { |
504 | type Item = Event<'a>; |
505 | |
506 | fn next(&mut self) -> Option<Self::Item> { |
507 | if let Some(first) = self.stored_events.pop_front() { |
508 | return Some(first); |
509 | } |
510 | |
511 | let event = self.inner.next()?; |
512 | |
513 | Some(match event { |
514 | Event::Start(Tag::Table(t)) => { |
515 | self.stored_events.push_back(Event::Start(Tag::Table(t))); |
516 | Event::Html(CowStr::Borrowed("<div>" )) |
517 | } |
518 | Event::End(TagEnd::Table) => { |
519 | self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>" ))); |
520 | Event::End(TagEnd::Table) |
521 | } |
522 | e => e, |
523 | }) |
524 | } |
525 | } |
526 | |
527 | type SpannedEvent<'a> = (Event<'a>, Range<usize>); |
528 | |
529 | /// Make headings links with anchor IDs and build up TOC. |
530 | struct HeadingLinks<'a, 'b, 'ids, I> { |
531 | inner: I, |
532 | toc: Option<&'b mut TocBuilder>, |
533 | buf: VecDeque<SpannedEvent<'a>>, |
534 | id_map: &'ids mut IdMap, |
535 | heading_offset: HeadingOffset, |
536 | } |
537 | |
538 | impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> { |
539 | fn new( |
540 | iter: I, |
541 | toc: Option<&'b mut TocBuilder>, |
542 | ids: &'ids mut IdMap, |
543 | heading_offset: HeadingOffset, |
544 | ) -> Self { |
545 | HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset } |
546 | } |
547 | } |
548 | |
549 | impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> { |
550 | type Item = SpannedEvent<'a>; |
551 | |
552 | fn next(&mut self) -> Option<Self::Item> { |
553 | if let Some(e) = self.buf.pop_front() { |
554 | return Some(e); |
555 | } |
556 | |
557 | let event = self.inner.next(); |
558 | if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event { |
559 | let mut id = String::new(); |
560 | for event in &mut self.inner { |
561 | match &event.0 { |
562 | Event::End(TagEnd::Heading(_)) => break, |
563 | Event::Text(text) | Event::Code(text) => { |
564 | id.extend(text.chars().filter_map(slugify)); |
565 | self.buf.push_back(event); |
566 | } |
567 | _ => self.buf.push_back(event), |
568 | } |
569 | } |
570 | let id = self.id_map.derive(id); |
571 | |
572 | if let Some(ref mut builder) = self.toc { |
573 | let mut text_header = String::new(); |
574 | plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header); |
575 | let mut html_header = String::new(); |
576 | html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header); |
577 | let sec = builder.push(level as u32, text_header, html_header, id.clone()); |
578 | self.buf.push_front((Event::Html(format!(" {sec} " ).into()), 0..0)); |
579 | } |
580 | |
581 | let level = |
582 | std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL); |
583 | self.buf.push_back((Event::Html(format!("</h {level}>" ).into()), 0..0)); |
584 | |
585 | let start_tags = |
586 | format!("<h {level} id= \"{id}\"><a class= \"doc-anchor \" href= \"# {id}\">§</a>" ); |
587 | return Some((Event::Html(start_tags.into()), 0..0)); |
588 | } |
589 | event |
590 | } |
591 | } |
592 | |
593 | /// Extracts just the first paragraph. |
594 | struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> { |
595 | inner: I, |
596 | started: bool, |
597 | depth: u32, |
598 | skipped_tags: u32, |
599 | } |
600 | |
601 | impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> { |
602 | fn new(iter: I) -> Self { |
603 | SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 } |
604 | } |
605 | } |
606 | |
607 | fn check_if_allowed_tag(t: &TagEnd) -> bool { |
608 | matches!( |
609 | t, |
610 | TagEnd::Paragraph |
611 | | TagEnd::Emphasis |
612 | | TagEnd::Strong |
613 | | TagEnd::Strikethrough |
614 | | TagEnd::Link |
615 | | TagEnd::BlockQuote |
616 | ) |
617 | } |
618 | |
619 | fn is_forbidden_tag(t: &TagEnd) -> bool { |
620 | matches!( |
621 | t, |
622 | TagEnd::CodeBlock |
623 | | TagEnd::Table |
624 | | TagEnd::TableHead |
625 | | TagEnd::TableRow |
626 | | TagEnd::TableCell |
627 | | TagEnd::FootnoteDefinition |
628 | ) |
629 | } |
630 | |
631 | impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> { |
632 | type Item = Event<'a>; |
633 | |
634 | fn next(&mut self) -> Option<Self::Item> { |
635 | if self.started && self.depth == 0 { |
636 | return None; |
637 | } |
638 | if !self.started { |
639 | self.started = true; |
640 | } |
641 | if let Some(event) = self.inner.next() { |
642 | let mut is_start = true; |
643 | let is_allowed_tag = match event { |
644 | Event::Start(ref c) => { |
645 | if is_forbidden_tag(&c.to_end()) { |
646 | self.skipped_tags += 1; |
647 | return None; |
648 | } |
649 | self.depth += 1; |
650 | check_if_allowed_tag(&c.to_end()) |
651 | } |
652 | Event::End(ref c) => { |
653 | if is_forbidden_tag(c) { |
654 | self.skipped_tags += 1; |
655 | return None; |
656 | } |
657 | self.depth -= 1; |
658 | is_start = false; |
659 | check_if_allowed_tag(c) |
660 | } |
661 | Event::FootnoteReference(_) => { |
662 | self.skipped_tags += 1; |
663 | false |
664 | } |
665 | _ => true, |
666 | }; |
667 | if !is_allowed_tag { |
668 | self.skipped_tags += 1; |
669 | } |
670 | return if !is_allowed_tag { |
671 | if is_start { |
672 | Some(Event::Start(Tag::Paragraph)) |
673 | } else { |
674 | Some(Event::End(TagEnd::Paragraph)) |
675 | } |
676 | } else { |
677 | Some(event) |
678 | }; |
679 | } |
680 | None |
681 | } |
682 | } |
683 | |
684 | /// A newtype that represents a relative line number in Markdown. |
685 | /// |
686 | /// In other words, this represents an offset from the first line of Markdown |
687 | /// in a doc comment or other source. If the first Markdown line appears on line 32, |
688 | /// and the `MdRelLine` is 3, then the absolute line for this one is 35. I.e., it's |
689 | /// a zero-based offset. |
690 | pub(crate) struct MdRelLine { |
691 | offset: usize, |
692 | } |
693 | |
694 | impl MdRelLine { |
695 | /// See struct docs. |
696 | pub(crate) const fn new(offset: usize) -> Self { |
697 | Self { offset } |
698 | } |
699 | |
700 | /// See struct docs. |
701 | pub(crate) const fn offset(self) -> usize { |
702 | self.offset |
703 | } |
704 | } |
705 | |
706 | pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>( |
707 | doc: &str, |
708 | tests: &mut T, |
709 | error_codes: ErrorCodes, |
710 | enable_per_target_ignores: bool, |
711 | extra_info: Option<&ExtraInfo<'_>>, |
712 | ) { |
713 | find_codes(doc, tests, error_codes, enable_per_target_ignores, extra_info, include_non_rust:false) |
714 | } |
715 | |
716 | pub(crate) fn find_codes<T: doctest::DocTestVisitor>( |
717 | doc: &str, |
718 | tests: &mut T, |
719 | error_codes: ErrorCodes, |
720 | enable_per_target_ignores: bool, |
721 | extra_info: Option<&ExtraInfo<'_>>, |
722 | include_non_rust: bool, |
723 | ) { |
724 | let mut parser = Parser::new(doc).into_offset_iter(); |
725 | let mut prev_offset = 0; |
726 | let mut nb_lines = 0; |
727 | let mut register_header = None; |
728 | while let Some((event, offset)) = parser.next() { |
729 | match event { |
730 | Event::Start(Tag::CodeBlock(kind)) => { |
731 | let block_info = match kind { |
732 | CodeBlockKind::Fenced(ref lang) => { |
733 | if lang.is_empty() { |
734 | Default::default() |
735 | } else { |
736 | LangString::parse( |
737 | lang, |
738 | error_codes, |
739 | enable_per_target_ignores, |
740 | extra_info, |
741 | ) |
742 | } |
743 | } |
744 | CodeBlockKind::Indented => Default::default(), |
745 | }; |
746 | if !include_non_rust && !block_info.rust { |
747 | continue; |
748 | } |
749 | |
750 | let mut test_s = String::new(); |
751 | |
752 | while let Some((Event::Text(s), _)) = parser.next() { |
753 | test_s.push_str(&s); |
754 | } |
755 | let text = test_s |
756 | .lines() |
757 | .map(|l| map_line(l).for_code()) |
758 | .collect::<Vec<Cow<'_, str>>>() |
759 | .join(" \n" ); |
760 | |
761 | nb_lines += doc[prev_offset..offset.start].lines().count(); |
762 | // If there are characters between the preceding line ending and |
763 | // this code block, `str::lines` will return an additional line, |
764 | // which we subtract here. |
765 | if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with(' \n' ) { |
766 | nb_lines -= 1; |
767 | } |
768 | let line = MdRelLine::new(nb_lines); |
769 | tests.visit_test(text, block_info, line); |
770 | prev_offset = offset.start; |
771 | } |
772 | Event::Start(Tag::Heading { level, .. }) => { |
773 | register_header = Some(level as u32); |
774 | } |
775 | Event::Text(ref s) if register_header.is_some() => { |
776 | let level = register_header.unwrap(); |
777 | tests.visit_header(s, level); |
778 | register_header = None; |
779 | } |
780 | _ => {} |
781 | } |
782 | } |
783 | } |
784 | |
785 | pub(crate) struct ExtraInfo<'tcx> { |
786 | def_id: LocalDefId, |
787 | sp: Span, |
788 | tcx: TyCtxt<'tcx>, |
789 | } |
790 | |
791 | impl<'tcx> ExtraInfo<'tcx> { |
792 | pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> { |
793 | ExtraInfo { def_id, sp, tcx } |
794 | } |
795 | |
796 | fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) { |
797 | self.tcx.node_span_lint( |
798 | crate::lint::INVALID_CODEBLOCK_ATTRIBUTES, |
799 | self.tcx.local_def_id_to_hir_id(self.def_id), |
800 | self.sp, |
801 | |lint| { |
802 | lint.primary_message(msg); |
803 | }, |
804 | ); |
805 | } |
806 | |
807 | fn error_invalid_codeblock_attr_with_help( |
808 | &self, |
809 | msg: impl Into<DiagMessage>, |
810 | f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>), |
811 | ) { |
812 | self.tcx.node_span_lint( |
813 | crate::lint::INVALID_CODEBLOCK_ATTRIBUTES, |
814 | self.tcx.local_def_id_to_hir_id(self.def_id), |
815 | self.sp, |
816 | |lint| { |
817 | lint.primary_message(msg); |
818 | f(lint); |
819 | }, |
820 | ); |
821 | } |
822 | } |
823 | |
824 | #[derive (Eq, PartialEq, Clone, Debug)] |
825 | pub(crate) struct LangString { |
826 | pub(crate) original: String, |
827 | pub(crate) should_panic: bool, |
828 | pub(crate) no_run: bool, |
829 | pub(crate) ignore: Ignore, |
830 | pub(crate) rust: bool, |
831 | pub(crate) test_harness: bool, |
832 | pub(crate) compile_fail: bool, |
833 | pub(crate) standalone_crate: bool, |
834 | pub(crate) error_codes: Vec<String>, |
835 | pub(crate) edition: Option<Edition>, |
836 | pub(crate) added_classes: Vec<String>, |
837 | pub(crate) unknown: Vec<String>, |
838 | } |
839 | |
840 | #[derive (Eq, PartialEq, Clone, Debug)] |
841 | pub(crate) enum Ignore { |
842 | All, |
843 | None, |
844 | Some(Vec<String>), |
845 | } |
846 | |
847 | /// This is the parser for fenced codeblocks attributes. It implements the following eBNF: |
848 | /// |
849 | /// ```eBNF |
850 | /// lang-string = *(token-list / delimited-attribute-list / comment) |
851 | /// |
852 | /// bareword = LEADINGCHAR *(CHAR) |
853 | /// bareword-without-leading-char = CHAR *(CHAR) |
854 | /// quoted-string = QUOTE *(NONQUOTE) QUOTE |
855 | /// token = bareword / quoted-string |
856 | /// token-without-leading-char = bareword-without-leading-char / quoted-string |
857 | /// sep = COMMA/WS *(COMMA/WS) |
858 | /// attribute = (DOT token)/(token EQUAL token-without-leading-char) |
859 | /// attribute-list = [sep] attribute *(sep attribute) [sep] |
860 | /// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET |
861 | /// token-list = [sep] token *(sep token) [sep] |
862 | /// comment = OPEN_PAREN *(all characters) CLOSE_PAREN |
863 | /// |
864 | /// OPEN_PAREN = "(" |
865 | /// CLOSE_PARENT = ")" |
866 | /// OPEN-CURLY-BRACKET = "{" |
867 | /// CLOSE-CURLY-BRACKET = "}" |
868 | /// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":" |
869 | /// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces. |
870 | /// ; Comma is used to separate language tokens, so it can't be used in one. |
871 | /// ; Quote is used to allow otherwise-disallowed characters in language tokens. |
872 | /// ; Equals is used to make key=value pairs in attribute blocks. |
873 | /// ; Backslash and grave are special Markdown characters. |
874 | /// ; Braces are used to start an attribute block. |
875 | /// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" | |
876 | /// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~" |
877 | /// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"` |
878 | /// COMMA = "," |
879 | /// DOT = "." |
880 | /// EQUAL = "=" |
881 | /// |
882 | /// ALPHA = %x41-5A / %x61-7A ; A-Z / a-z |
883 | /// DIGIT = %x30-39 |
884 | /// WS = %x09 / " " |
885 | /// ``` |
886 | pub(crate) struct TagIterator<'a, 'tcx> { |
887 | inner: Peekable<CharIndices<'a>>, |
888 | data: &'a str, |
889 | is_in_attribute_block: bool, |
890 | extra: Option<&'a ExtraInfo<'tcx>>, |
891 | is_error: bool, |
892 | } |
893 | |
894 | #[derive (Clone, Debug, Eq, PartialEq)] |
895 | pub(crate) enum LangStringToken<'a> { |
896 | LangToken(&'a str), |
897 | ClassAttribute(&'a str), |
898 | KeyValueAttribute(&'a str, &'a str), |
899 | } |
900 | |
901 | fn is_leading_char(c: char) -> bool { |
902 | c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit() |
903 | } |
904 | fn is_bareword_char(c: char) -> bool { |
905 | is_leading_char(c) || ".!#$%&*+/;<>?@^|~" .contains(c) |
906 | } |
907 | fn is_separator(c: char) -> bool { |
908 | c == ' ' || c == ',' || c == ' \t' |
909 | } |
910 | |
911 | struct Indices { |
912 | start: usize, |
913 | end: usize, |
914 | } |
915 | |
916 | impl<'a, 'tcx> TagIterator<'a, 'tcx> { |
917 | pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self { |
918 | Self { |
919 | inner: data.char_indices().peekable(), |
920 | data, |
921 | is_in_attribute_block: false, |
922 | extra, |
923 | is_error: false, |
924 | } |
925 | } |
926 | |
927 | fn emit_error(&mut self, err: impl Into<DiagMessage>) { |
928 | if let Some(extra) = self.extra { |
929 | extra.error_invalid_codeblock_attr(err); |
930 | } |
931 | self.is_error = true; |
932 | } |
933 | |
934 | fn skip_separators(&mut self) -> Option<usize> { |
935 | while let Some((pos, c)) = self.inner.peek() { |
936 | if !is_separator(*c) { |
937 | return Some(*pos); |
938 | } |
939 | self.inner.next(); |
940 | } |
941 | None |
942 | } |
943 | |
944 | fn parse_string(&mut self, start: usize) -> Option<Indices> { |
945 | for (pos, c) in self.inner.by_ref() { |
946 | if c == '"' { |
947 | return Some(Indices { start: start + 1, end: pos }); |
948 | } |
949 | } |
950 | self.emit_error("unclosed quote string ` \"`" ); |
951 | None |
952 | } |
953 | |
954 | fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> { |
955 | while let Some((pos, c)) = self.inner.peek().copied() { |
956 | if is_bareword_char(c) { |
957 | self.inner.next(); |
958 | } else { |
959 | let class = &self.data[start + 1..pos]; |
960 | if class.is_empty() { |
961 | self.emit_error(format!("unexpected ` {c}` character after `.`" )); |
962 | return None; |
963 | } else if self.check_after_token() { |
964 | return Some(LangStringToken::ClassAttribute(class)); |
965 | } else { |
966 | return None; |
967 | } |
968 | } |
969 | } |
970 | let class = &self.data[start + 1..]; |
971 | if class.is_empty() { |
972 | self.emit_error("missing character after `.`" ); |
973 | None |
974 | } else if self.check_after_token() { |
975 | Some(LangStringToken::ClassAttribute(class)) |
976 | } else { |
977 | None |
978 | } |
979 | } |
980 | |
981 | fn parse_token(&mut self, start: usize) -> Option<Indices> { |
982 | while let Some((pos, c)) = self.inner.peek() { |
983 | if !is_bareword_char(*c) { |
984 | return Some(Indices { start, end: *pos }); |
985 | } |
986 | self.inner.next(); |
987 | } |
988 | self.emit_error("unexpected end" ); |
989 | None |
990 | } |
991 | |
992 | fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> { |
993 | let key_indices = |
994 | if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? }; |
995 | if key_indices.start == key_indices.end { |
996 | self.emit_error("unexpected empty string as key" ); |
997 | return None; |
998 | } |
999 | |
1000 | if let Some((_, c)) = self.inner.next() { |
1001 | if c != '=' { |
1002 | self.emit_error(format!("expected `=`, found ` {}`" , c)); |
1003 | return None; |
1004 | } |
1005 | } else { |
1006 | self.emit_error("unexpected end" ); |
1007 | return None; |
1008 | } |
1009 | let value_indices = match self.inner.next() { |
1010 | Some((pos, '"' )) => self.parse_string(pos)?, |
1011 | Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?, |
1012 | Some((_, c)) => { |
1013 | self.emit_error(format!("unexpected ` {c}` character after `=`" )); |
1014 | return None; |
1015 | } |
1016 | None => { |
1017 | self.emit_error("expected value after `=`" ); |
1018 | return None; |
1019 | } |
1020 | }; |
1021 | if value_indices.start == value_indices.end { |
1022 | self.emit_error("unexpected empty string as value" ); |
1023 | None |
1024 | } else if self.check_after_token() { |
1025 | Some(LangStringToken::KeyValueAttribute( |
1026 | &self.data[key_indices.start..key_indices.end], |
1027 | &self.data[value_indices.start..value_indices.end], |
1028 | )) |
1029 | } else { |
1030 | None |
1031 | } |
1032 | } |
1033 | |
1034 | /// Returns `false` if an error was emitted. |
1035 | fn check_after_token(&mut self) -> bool { |
1036 | if let Some((_, c)) = self.inner.peek().copied() { |
1037 | if c == '}' || is_separator(c) || c == '(' { |
1038 | true |
1039 | } else { |
1040 | self.emit_error(format!("unexpected ` {c}` character" )); |
1041 | false |
1042 | } |
1043 | } else { |
1044 | // The error will be caught on the next iteration. |
1045 | true |
1046 | } |
1047 | } |
1048 | |
1049 | fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> { |
1050 | if let Some((pos, c)) = self.inner.next() { |
1051 | if c == '}' { |
1052 | self.is_in_attribute_block = false; |
1053 | return self.next(); |
1054 | } else if c == '.' { |
1055 | return self.parse_class(pos); |
1056 | } else if c == '"' || is_leading_char(c) { |
1057 | return self.parse_key_value(c, pos); |
1058 | } else { |
1059 | self.emit_error(format!("unexpected character ` {c}`" )); |
1060 | return None; |
1061 | } |
1062 | } |
1063 | self.emit_error("unclosed attribute block (`{}`): missing `}` at the end" ); |
1064 | None |
1065 | } |
1066 | |
1067 | /// Returns `false` if an error was emitted. |
1068 | fn skip_paren_block(&mut self) -> bool { |
1069 | for (_, c) in self.inner.by_ref() { |
1070 | if c == ')' { |
1071 | return true; |
1072 | } |
1073 | } |
1074 | self.emit_error("unclosed comment: missing `)` at the end" ); |
1075 | false |
1076 | } |
1077 | |
1078 | fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> { |
1079 | while let Some((pos, c)) = self.inner.next() { |
1080 | if c == '"' { |
1081 | if pos != start { |
1082 | self.emit_error("expected ` `, `{` or `,` found ` \"`" ); |
1083 | return None; |
1084 | } |
1085 | let indices = self.parse_string(pos)?; |
1086 | if let Some((_, c)) = self.inner.peek().copied() |
1087 | && c != '{' |
1088 | && !is_separator(c) |
1089 | && c != '(' |
1090 | { |
1091 | self.emit_error(format!("expected ` `, ` {{` or `,` after ` \"`, found ` {c}`" )); |
1092 | return None; |
1093 | } |
1094 | return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end])); |
1095 | } else if c == '{' { |
1096 | self.is_in_attribute_block = true; |
1097 | return self.next(); |
1098 | } else if is_separator(c) { |
1099 | if pos != start { |
1100 | return Some(LangStringToken::LangToken(&self.data[start..pos])); |
1101 | } |
1102 | return self.next(); |
1103 | } else if c == '(' { |
1104 | if !self.skip_paren_block() { |
1105 | return None; |
1106 | } |
1107 | if pos != start { |
1108 | return Some(LangStringToken::LangToken(&self.data[start..pos])); |
1109 | } |
1110 | return self.next(); |
1111 | } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c)) |
1112 | { |
1113 | continue; |
1114 | } else { |
1115 | self.emit_error(format!("unexpected character ` {c}`" )); |
1116 | return None; |
1117 | } |
1118 | } |
1119 | let token = &self.data[start..]; |
1120 | if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) } |
1121 | } |
1122 | } |
1123 | |
1124 | impl<'a> Iterator for TagIterator<'a, '_> { |
1125 | type Item = LangStringToken<'a>; |
1126 | |
1127 | fn next(&mut self) -> Option<Self::Item> { |
1128 | if self.is_error { |
1129 | return None; |
1130 | } |
1131 | let Some(start: usize) = self.skip_separators() else { |
1132 | if self.is_in_attribute_block { |
1133 | self.emit_error(err:"unclosed attribute block (`{}`): missing `}` at the end" ); |
1134 | } |
1135 | return None; |
1136 | }; |
1137 | if self.is_in_attribute_block { |
1138 | self.parse_in_attribute_block() |
1139 | } else { |
1140 | self.parse_outside_attribute_block(start) |
1141 | } |
1142 | } |
1143 | } |
1144 | |
1145 | impl Default for LangString { |
1146 | fn default() -> Self { |
1147 | Self { |
1148 | original: String::new(), |
1149 | should_panic: false, |
1150 | no_run: false, |
1151 | ignore: Ignore::None, |
1152 | rust: true, |
1153 | test_harness: false, |
1154 | compile_fail: false, |
1155 | standalone_crate: false, |
1156 | error_codes: Vec::new(), |
1157 | edition: None, |
1158 | added_classes: Vec::new(), |
1159 | unknown: Vec::new(), |
1160 | } |
1161 | } |
1162 | } |
1163 | |
1164 | impl LangString { |
1165 | fn parse_without_check( |
1166 | string: &str, |
1167 | allow_error_code_check: ErrorCodes, |
1168 | enable_per_target_ignores: bool, |
1169 | ) -> Self { |
1170 | Self::parse(string, allow_error_code_check, enable_per_target_ignores, None) |
1171 | } |
1172 | |
1173 | fn parse( |
1174 | string: &str, |
1175 | allow_error_code_check: ErrorCodes, |
1176 | enable_per_target_ignores: bool, |
1177 | extra: Option<&ExtraInfo<'_>>, |
1178 | ) -> Self { |
1179 | let allow_error_code_check = allow_error_code_check.as_bool(); |
1180 | let mut seen_rust_tags = false; |
1181 | let mut seen_other_tags = false; |
1182 | let mut seen_custom_tag = false; |
1183 | let mut data = LangString::default(); |
1184 | let mut ignores = vec![]; |
1185 | |
1186 | data.original = string.to_owned(); |
1187 | |
1188 | let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| { |
1189 | for token in tokens { |
1190 | match token { |
1191 | LangStringToken::LangToken("should_panic" ) => { |
1192 | data.should_panic = true; |
1193 | seen_rust_tags = !seen_other_tags; |
1194 | } |
1195 | LangStringToken::LangToken("no_run" ) => { |
1196 | data.no_run = true; |
1197 | seen_rust_tags = !seen_other_tags; |
1198 | } |
1199 | LangStringToken::LangToken("ignore" ) => { |
1200 | data.ignore = Ignore::All; |
1201 | seen_rust_tags = !seen_other_tags; |
1202 | } |
1203 | LangStringToken::LangToken(x) |
1204 | if let Some(ignore) = x.strip_prefix("ignore-" ) => |
1205 | { |
1206 | if enable_per_target_ignores { |
1207 | ignores.push(ignore.to_owned()); |
1208 | seen_rust_tags = !seen_other_tags; |
1209 | } |
1210 | } |
1211 | LangStringToken::LangToken("rust" ) => { |
1212 | data.rust = true; |
1213 | seen_rust_tags = true; |
1214 | } |
1215 | LangStringToken::LangToken("custom" ) => { |
1216 | seen_custom_tag = true; |
1217 | } |
1218 | LangStringToken::LangToken("test_harness" ) => { |
1219 | data.test_harness = true; |
1220 | seen_rust_tags = !seen_other_tags || seen_rust_tags; |
1221 | } |
1222 | LangStringToken::LangToken("compile_fail" ) => { |
1223 | data.compile_fail = true; |
1224 | seen_rust_tags = !seen_other_tags || seen_rust_tags; |
1225 | data.no_run = true; |
1226 | } |
1227 | LangStringToken::LangToken("standalone_crate" ) => { |
1228 | data.standalone_crate = true; |
1229 | seen_rust_tags = !seen_other_tags || seen_rust_tags; |
1230 | } |
1231 | LangStringToken::LangToken(x) |
1232 | if let Some(edition) = x.strip_prefix("edition" ) => |
1233 | { |
1234 | data.edition = edition.parse::<Edition>().ok(); |
1235 | } |
1236 | LangStringToken::LangToken(x) |
1237 | if let Some(edition) = x.strip_prefix("rust" ) |
1238 | && edition.parse::<Edition>().is_ok() |
1239 | && let Some(extra) = extra => |
1240 | { |
1241 | extra.error_invalid_codeblock_attr_with_help( |
1242 | format!("unknown attribute ` {x}`" ), |
1243 | |lint| { |
1244 | lint.help(format!( |
1245 | "there is an attribute with a similar name: `edition {edition}`" |
1246 | )); |
1247 | }, |
1248 | ); |
1249 | } |
1250 | LangStringToken::LangToken(x) |
1251 | if allow_error_code_check |
1252 | && let Some(error_code) = x.strip_prefix('E' ) |
1253 | && error_code.len() == 4 => |
1254 | { |
1255 | if error_code.parse::<u32>().is_ok() { |
1256 | data.error_codes.push(x.to_owned()); |
1257 | seen_rust_tags = !seen_other_tags || seen_rust_tags; |
1258 | } else { |
1259 | seen_other_tags = true; |
1260 | } |
1261 | } |
1262 | LangStringToken::LangToken(x) if let Some(extra) = extra => { |
1263 | if let Some(help) = match x.to_lowercase().as_str() { |
1264 | "compile-fail" | "compile_fail" | "compilefail" => Some( |
1265 | "use `compile_fail` to invert the results of this test, so that it \ |
1266 | passes if it cannot be compiled and fails if it can" , |
1267 | ), |
1268 | "should-panic" | "should_panic" | "shouldpanic" => Some( |
1269 | "use `should_panic` to invert the results of this test, so that if \ |
1270 | passes if it panics and fails if it does not" , |
1271 | ), |
1272 | "no-run" | "no_run" | "norun" => Some( |
1273 | "use `no_run` to compile, but not run, the code sample during \ |
1274 | testing" , |
1275 | ), |
1276 | "test-harness" | "test_harness" | "testharness" => Some( |
1277 | "use `test_harness` to run functions marked `#[test]` instead of a \ |
1278 | potentially-implicit `main` function" , |
1279 | ), |
1280 | "standalone" | "standalone_crate" | "standalone-crate" |
1281 | if extra.sp.at_least_rust_2024() => |
1282 | { |
1283 | Some( |
1284 | "use `standalone_crate` to compile this code block \ |
1285 | separately" , |
1286 | ) |
1287 | } |
1288 | _ => None, |
1289 | } { |
1290 | extra.error_invalid_codeblock_attr_with_help( |
1291 | format!("unknown attribute ` {x}`" ), |
1292 | |lint| { |
1293 | lint.help(help).help( |
1294 | "this code block may be skipped during testing, \ |
1295 | because unknown attributes are treated as markers for \ |
1296 | code samples written in other programming languages, \ |
1297 | unless it is also explicitly marked as `rust`" , |
1298 | ); |
1299 | }, |
1300 | ); |
1301 | } |
1302 | seen_other_tags = true; |
1303 | data.unknown.push(x.to_owned()); |
1304 | } |
1305 | LangStringToken::LangToken(x) => { |
1306 | seen_other_tags = true; |
1307 | data.unknown.push(x.to_owned()); |
1308 | } |
1309 | LangStringToken::KeyValueAttribute("class" , value) => { |
1310 | data.added_classes.push(value.to_owned()); |
1311 | } |
1312 | LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => { |
1313 | extra |
1314 | .error_invalid_codeblock_attr(format!("unsupported attribute ` {key}`" )); |
1315 | } |
1316 | LangStringToken::ClassAttribute(class) => { |
1317 | data.added_classes.push(class.to_owned()); |
1318 | } |
1319 | _ => {} |
1320 | } |
1321 | } |
1322 | }; |
1323 | |
1324 | let mut tag_iter = TagIterator::new(string, extra); |
1325 | call(&mut tag_iter); |
1326 | |
1327 | // ignore-foo overrides ignore |
1328 | if !ignores.is_empty() { |
1329 | data.ignore = Ignore::Some(ignores); |
1330 | } |
1331 | |
1332 | data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error; |
1333 | |
1334 | data |
1335 | } |
1336 | } |
1337 | |
1338 | impl<'a> Markdown<'a> { |
1339 | pub fn into_string(self) -> String { |
1340 | // This is actually common enough to special-case |
1341 | if self.content.is_empty() { |
1342 | return String::new(); |
1343 | } |
1344 | |
1345 | let mut s = String::with_capacity(self.content.len() * 3 / 2); |
1346 | html::push_html(&mut s, self.into_iter()); |
1347 | |
1348 | s |
1349 | } |
1350 | |
1351 | fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> { |
1352 | let Markdown { |
1353 | content: md, |
1354 | links, |
1355 | ids, |
1356 | error_codes: codes, |
1357 | edition, |
1358 | playground, |
1359 | heading_offset, |
1360 | } = self; |
1361 | |
1362 | let replacer = move |broken_link: BrokenLink<'_>| { |
1363 | links |
1364 | .iter() |
1365 | .find(|link| *link.original_text == *broken_link.reference) |
1366 | .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into())) |
1367 | }; |
1368 | |
1369 | let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer)); |
1370 | let p = p.into_offset_iter(); |
1371 | |
1372 | ids.handle_footnotes(|ids, existing_footnotes| { |
1373 | let p = HeadingLinks::new(p, None, ids, heading_offset); |
1374 | let p = SpannedLinkReplacer::new(p, links); |
1375 | let p = footnotes::Footnotes::new(p, existing_footnotes); |
1376 | let p = TableWrapper::new(p.map(|(ev, _)| ev)); |
1377 | CodeBlocks::new(p, codes, edition, playground) |
1378 | }) |
1379 | } |
1380 | |
1381 | /// Convert markdown to (summary, remaining) HTML. |
1382 | /// |
1383 | /// - The summary is the first top-level Markdown element (usually a paragraph, but potentially |
1384 | /// any block). |
1385 | /// - The remaining docs contain everything after the summary. |
1386 | pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) { |
1387 | if self.content.is_empty() { |
1388 | return (None, None); |
1389 | } |
1390 | let mut p = self.into_iter(); |
1391 | |
1392 | let mut event_level = 0; |
1393 | let mut summary_events = Vec::new(); |
1394 | let mut get_next_tag = false; |
1395 | |
1396 | let mut end_of_summary = false; |
1397 | while let Some(event) = p.next() { |
1398 | match event { |
1399 | Event::Start(_) => event_level += 1, |
1400 | Event::End(kind) => { |
1401 | event_level -= 1; |
1402 | if event_level == 0 { |
1403 | // We're back at the "top" so it means we're done with the summary. |
1404 | end_of_summary = true; |
1405 | // We surround tables with `<div>` HTML tags so this is a special case. |
1406 | get_next_tag = kind == TagEnd::Table; |
1407 | } |
1408 | } |
1409 | _ => {} |
1410 | } |
1411 | summary_events.push(event); |
1412 | if end_of_summary { |
1413 | if get_next_tag && let Some(event) = p.next() { |
1414 | summary_events.push(event); |
1415 | } |
1416 | break; |
1417 | } |
1418 | } |
1419 | let mut summary = String::new(); |
1420 | html::push_html(&mut summary, summary_events.into_iter()); |
1421 | if summary.is_empty() { |
1422 | return (None, None); |
1423 | } |
1424 | let mut content = String::new(); |
1425 | html::push_html(&mut content, p); |
1426 | |
1427 | if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) } |
1428 | } |
1429 | } |
1430 | |
1431 | impl MarkdownWithToc<'_> { |
1432 | pub(crate) fn into_parts(self) -> (Toc, String) { |
1433 | let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } = |
1434 | self; |
1435 | |
1436 | // This is actually common enough to special-case |
1437 | if md.is_empty() { |
1438 | return (Toc { entries: Vec::new() }, String::new()); |
1439 | } |
1440 | let mut replacer = |broken_link: BrokenLink<'_>| { |
1441 | links |
1442 | .iter() |
1443 | .find(|link| *link.original_text == *broken_link.reference) |
1444 | .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into())) |
1445 | }; |
1446 | |
1447 | let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer)); |
1448 | let p = p.into_offset_iter(); |
1449 | |
1450 | let mut s = String::with_capacity(md.len() * 3 / 2); |
1451 | |
1452 | let mut toc = TocBuilder::new(); |
1453 | |
1454 | ids.handle_footnotes(|ids, existing_footnotes| { |
1455 | let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1); |
1456 | let p = footnotes::Footnotes::new(p, existing_footnotes); |
1457 | let p = TableWrapper::new(p.map(|(ev, _)| ev)); |
1458 | let p = CodeBlocks::new(p, codes, edition, playground); |
1459 | html::push_html(&mut s, p); |
1460 | }); |
1461 | |
1462 | (toc.into_toc(), s) |
1463 | } |
1464 | pub(crate) fn into_string(self) -> String { |
1465 | let (toc, s) = self.into_parts(); |
1466 | format!("<nav id= \"rustdoc \"> {toc}</nav> {s}" , toc = toc.print()) |
1467 | } |
1468 | } |
1469 | |
1470 | impl MarkdownItemInfo<'_> { |
1471 | pub(crate) fn into_string(self) -> String { |
1472 | let MarkdownItemInfo(md, ids) = self; |
1473 | |
1474 | // This is actually common enough to special-case |
1475 | if md.is_empty() { |
1476 | return String::new(); |
1477 | } |
1478 | let p = Parser::new_ext(md, main_body_opts()).into_offset_iter(); |
1479 | |
1480 | // Treat inline HTML as plain text. |
1481 | let p = p.map(|event| match event.0 { |
1482 | Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1), |
1483 | _ => event, |
1484 | }); |
1485 | |
1486 | let mut s = String::with_capacity(md.len() * 3 / 2); |
1487 | |
1488 | ids.handle_footnotes(|ids, existing_footnotes| { |
1489 | let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1); |
1490 | let p = footnotes::Footnotes::new(p, existing_footnotes); |
1491 | let p = TableWrapper::new(p.map(|(ev, _)| ev)); |
1492 | let p = p.filter(|event| { |
1493 | !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) |
1494 | }); |
1495 | html::push_html(&mut s, p); |
1496 | }); |
1497 | |
1498 | s |
1499 | } |
1500 | } |
1501 | |
1502 | impl MarkdownSummaryLine<'_> { |
1503 | pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) { |
1504 | let MarkdownSummaryLine(md, links) = self; |
1505 | // This is actually common enough to special-case |
1506 | if md.is_empty() { |
1507 | return (String::new(), false); |
1508 | } |
1509 | |
1510 | let mut replacer = |broken_link: BrokenLink<'_>| { |
1511 | links |
1512 | .iter() |
1513 | .find(|link| *link.original_text == *broken_link.reference) |
1514 | .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into())) |
1515 | }; |
1516 | |
1517 | let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer)) |
1518 | .peekable(); |
1519 | let mut summary = SummaryLine::new(p); |
1520 | |
1521 | let mut s = String::new(); |
1522 | |
1523 | let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| { |
1524 | !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) |
1525 | }); |
1526 | |
1527 | html::push_html(&mut s, without_paragraphs); |
1528 | |
1529 | let has_more_content = |
1530 | matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0; |
1531 | |
1532 | (s, has_more_content) |
1533 | } |
1534 | |
1535 | pub(crate) fn into_string(self) -> String { |
1536 | self.into_string_with_has_more_content().0 |
1537 | } |
1538 | } |
1539 | |
1540 | /// Renders a subset of Markdown in the first paragraph of the provided Markdown. |
1541 | /// |
1542 | /// - *Italics*, **bold**, and `inline code` styles **are** rendered. |
1543 | /// - Headings and links are stripped (though the text *is* rendered). |
1544 | /// - HTML, code blocks, and everything else are ignored. |
1545 | /// |
1546 | /// Returns a tuple of the rendered HTML string and whether the output was shortened |
1547 | /// due to the provided `length_limit`. |
1548 | fn markdown_summary_with_limit( |
1549 | md: &str, |
1550 | link_names: &[RenderedLink], |
1551 | length_limit: usize, |
1552 | ) -> (String, bool) { |
1553 | if md.is_empty() { |
1554 | return (String::new(), false); |
1555 | } |
1556 | |
1557 | let mut replacer = |broken_link: BrokenLink<'_>| { |
1558 | link_names |
1559 | .iter() |
1560 | .find(|link| *link.original_text == *broken_link.reference) |
1561 | .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into())) |
1562 | }; |
1563 | |
1564 | let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer)); |
1565 | let mut p = LinkReplacer::new(p, link_names); |
1566 | |
1567 | let mut buf = HtmlWithLimit::new(length_limit); |
1568 | let mut stopped_early = false; |
1569 | let _ = p.try_for_each(|event| { |
1570 | match &event { |
1571 | Event::Text(text) => { |
1572 | let r = |
1573 | text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word)); |
1574 | if r.is_break() { |
1575 | stopped_early = true; |
1576 | } |
1577 | return r; |
1578 | } |
1579 | Event::Code(code) => { |
1580 | buf.open_tag("code" ); |
1581 | let r = buf.push(code); |
1582 | if r.is_break() { |
1583 | stopped_early = true; |
1584 | } else { |
1585 | buf.close_tag(); |
1586 | } |
1587 | return r; |
1588 | } |
1589 | Event::Start(tag) => match tag { |
1590 | Tag::Emphasis => buf.open_tag("em" ), |
1591 | Tag::Strong => buf.open_tag("strong" ), |
1592 | Tag::CodeBlock(..) => return ControlFlow::Break(()), |
1593 | _ => {} |
1594 | }, |
1595 | Event::End(tag) => match tag { |
1596 | TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(), |
1597 | TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()), |
1598 | _ => {} |
1599 | }, |
1600 | Event::HardBreak | Event::SoftBreak => buf.push(" " )?, |
1601 | _ => {} |
1602 | }; |
1603 | ControlFlow::Continue(()) |
1604 | }); |
1605 | |
1606 | (buf.finish(), stopped_early) |
1607 | } |
1608 | |
1609 | /// Renders a shortened first paragraph of the given Markdown as a subset of Markdown, |
1610 | /// making it suitable for contexts like the search index. |
1611 | /// |
1612 | /// Will shorten to 59 or 60 characters, including an ellipsis (…) if it was shortened. |
1613 | /// |
1614 | /// See [`markdown_summary_with_limit`] for details about what is rendered and what is not. |
1615 | pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String { |
1616 | let (mut s: String, was_shortened: bool) = markdown_summary_with_limit(md:markdown, link_names, length_limit:59); |
1617 | |
1618 | if was_shortened { |
1619 | s.push(ch:'…' ); |
1620 | } |
1621 | |
1622 | s |
1623 | } |
1624 | |
1625 | /// Renders the first paragraph of the provided markdown as plain text. |
1626 | /// Useful for alt-text. |
1627 | /// |
1628 | /// - Headings, links, and formatting are stripped. |
1629 | /// - Inline code is rendered as-is, surrounded by backticks. |
1630 | /// - HTML and code blocks are ignored. |
1631 | pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String { |
1632 | if md.is_empty() { |
1633 | return String::new(); |
1634 | } |
1635 | |
1636 | let mut s: String = String::with_capacity(md.len() * 3 / 2); |
1637 | |
1638 | let mut replacer: impl Fn({unknown}) -> Option<…> = |broken_link: BrokenLink<'_>| { |
1639 | link_namesOption<&RenderedLink> |
1640 | .iter() |
1641 | .find(|link: &&RenderedLink| *link.original_text == *broken_link.reference) |
1642 | .map(|link: &RenderedLink| (link.href.as_str().into(), link.tooltip.as_str().into())) |
1643 | }; |
1644 | |
1645 | let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer)); |
1646 | |
1647 | plain_text_from_events(events:p, &mut s); |
1648 | |
1649 | s |
1650 | } |
1651 | |
1652 | pub(crate) fn plain_text_from_events<'a>( |
1653 | events: impl Iterator<Item = pulldown_cmark::Event<'a>>, |
1654 | s: &mut String, |
1655 | ) { |
1656 | for event in events { |
1657 | match &event { |
1658 | Event::Text(text: &str) => s.push_str(string:text), |
1659 | Event::Code(code: &str) => { |
1660 | s.push(ch:'`' ); |
1661 | s.push_str(string:code); |
1662 | s.push(ch:'`' ); |
1663 | } |
1664 | Event::HardBreak | Event::SoftBreak => s.push(ch:' ' ), |
1665 | Event::Start(Tag::CodeBlock(..)) => break, |
1666 | Event::End(TagEnd::Paragraph) => break, |
1667 | Event::End(TagEnd::Heading(..)) => break, |
1668 | _ => (), |
1669 | } |
1670 | } |
1671 | } |
1672 | |
1673 | pub(crate) fn html_text_from_events<'a>( |
1674 | events: impl Iterator<Item = pulldown_cmark::Event<'a>>, |
1675 | s: &mut String, |
1676 | ) { |
1677 | for event in events { |
1678 | match &event { |
1679 | Event::Text(text: &str) => { |
1680 | write!(s, " {}" , EscapeBodyText(text)).expect(msg:"string alloc infallible" ) |
1681 | } |
1682 | Event::Code(code: &str) => { |
1683 | s.push_str(string:"<code>" ); |
1684 | write!(s, " {}" , EscapeBodyText(code)).expect(msg:"string alloc infallible" ); |
1685 | s.push_str(string:"</code>" ); |
1686 | } |
1687 | Event::HardBreak | Event::SoftBreak => s.push(ch:' ' ), |
1688 | Event::Start(Tag::CodeBlock(..)) => break, |
1689 | Event::End(TagEnd::Paragraph) => break, |
1690 | Event::End(TagEnd::Heading(..)) => break, |
1691 | _ => (), |
1692 | } |
1693 | } |
1694 | } |
1695 | |
1696 | #[derive (Debug)] |
1697 | pub(crate) struct MarkdownLink { |
1698 | pub kind: LinkType, |
1699 | pub link: String, |
1700 | pub range: MarkdownLinkRange, |
1701 | } |
1702 | |
1703 | #[derive (Clone, Debug)] |
1704 | pub(crate) enum MarkdownLinkRange { |
1705 | /// Normally, markdown link warnings point only at the destination. |
1706 | Destination(Range<usize>), |
1707 | /// In some cases, it's not possible to point at the destination. |
1708 | /// Usually, this happens because backslashes `\\` are used. |
1709 | /// When that happens, point at the whole link, and don't provide structured suggestions. |
1710 | WholeLink(Range<usize>), |
1711 | } |
1712 | |
1713 | impl MarkdownLinkRange { |
1714 | /// Extracts the inner range. |
1715 | pub fn inner_range(&self) -> &Range<usize> { |
1716 | match self { |
1717 | MarkdownLinkRange::Destination(range: &Range) => range, |
1718 | MarkdownLinkRange::WholeLink(range: &Range) => range, |
1719 | } |
1720 | } |
1721 | } |
1722 | |
1723 | pub(crate) fn markdown_links<'md, R>( |
1724 | md: &'md str, |
1725 | preprocess_link: impl Fn(MarkdownLink) -> Option<R>, |
1726 | ) -> Vec<R> { |
1727 | use itertools::Itertools; |
1728 | if md.is_empty() { |
1729 | return vec![]; |
1730 | } |
1731 | |
1732 | // FIXME: remove this function once pulldown_cmark can provide spans for link definitions. |
1733 | let locate = |s: &str, fallback: Range<usize>| unsafe { |
1734 | let s_start = s.as_ptr(); |
1735 | let s_end = s_start.add(s.len()); |
1736 | let md_start = md.as_ptr(); |
1737 | let md_end = md_start.add(md.len()); |
1738 | if md_start <= s_start && s_end <= md_end { |
1739 | let start = s_start.offset_from(md_start) as usize; |
1740 | let end = s_end.offset_from(md_start) as usize; |
1741 | MarkdownLinkRange::Destination(start..end) |
1742 | } else { |
1743 | MarkdownLinkRange::WholeLink(fallback) |
1744 | } |
1745 | }; |
1746 | |
1747 | let span_for_link = |link: &CowStr<'_>, span: Range<usize>| { |
1748 | // For diagnostics, we want to underline the link's definition but `span` will point at |
1749 | // where the link is used. This is a problem for reference-style links, where the definition |
1750 | // is separate from the usage. |
1751 | |
1752 | match link { |
1753 | // `Borrowed` variant means the string (the link's destination) may come directly from |
1754 | // the markdown text and we can locate the original link destination. |
1755 | // NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources, |
1756 | // so `locate()` can fall back to use `span`. |
1757 | CowStr::Borrowed(s) => locate(s, span), |
1758 | |
1759 | // For anything else, we can only use the provided range. |
1760 | CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span), |
1761 | } |
1762 | }; |
1763 | |
1764 | let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| { |
1765 | // We want to underline the link's definition, but `span` will point at the entire refdef. |
1766 | // Skip the label, then try to find the entire URL. |
1767 | let mut square_brace_count = 0; |
1768 | let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate(); |
1769 | for (_i, c) in &mut iter { |
1770 | match c { |
1771 | b':' if square_brace_count == 0 => break, |
1772 | b'[' => square_brace_count += 1, |
1773 | b']' => square_brace_count -= 1, |
1774 | _ => {} |
1775 | } |
1776 | } |
1777 | while let Some((i, c)) = iter.next() { |
1778 | if c == b'<' { |
1779 | while let Some((j, c)) = iter.next() { |
1780 | match c { |
1781 | b' \\' => { |
1782 | let _ = iter.next(); |
1783 | } |
1784 | b'>' => { |
1785 | return MarkdownLinkRange::Destination( |
1786 | i + 1 + span.start..j + span.start, |
1787 | ); |
1788 | } |
1789 | _ => {} |
1790 | } |
1791 | } |
1792 | } else if !c.is_ascii_whitespace() { |
1793 | for (j, c) in iter.by_ref() { |
1794 | if c.is_ascii_whitespace() { |
1795 | return MarkdownLinkRange::Destination(i + span.start..j + span.start); |
1796 | } |
1797 | } |
1798 | return MarkdownLinkRange::Destination(i + span.start..span.end); |
1799 | } |
1800 | } |
1801 | span_for_link(link, span) |
1802 | }; |
1803 | |
1804 | let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| { |
1805 | let mut open_brace = !0; |
1806 | let mut close_brace = !0; |
1807 | for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() { |
1808 | let i = i + span.start; |
1809 | if b == close { |
1810 | close_brace = i; |
1811 | break; |
1812 | } |
1813 | } |
1814 | if close_brace < span.start || close_brace >= span.end { |
1815 | return MarkdownLinkRange::WholeLink(span); |
1816 | } |
1817 | let mut nesting = 1; |
1818 | for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() { |
1819 | let i = i + span.start; |
1820 | if b == close { |
1821 | nesting += 1; |
1822 | } |
1823 | if b == open { |
1824 | nesting -= 1; |
1825 | } |
1826 | if nesting == 0 { |
1827 | open_brace = i; |
1828 | break; |
1829 | } |
1830 | } |
1831 | assert!(open_brace != close_brace); |
1832 | if open_brace < span.start || open_brace >= span.end { |
1833 | return MarkdownLinkRange::WholeLink(span); |
1834 | } |
1835 | // do not actually include braces in the span |
1836 | let range = (open_brace + 1)..close_brace; |
1837 | MarkdownLinkRange::Destination(range) |
1838 | }; |
1839 | |
1840 | let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| { |
1841 | let mut open_brace = !0; |
1842 | let mut close_brace = !0; |
1843 | for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() { |
1844 | let i = i + span.start; |
1845 | if b == open { |
1846 | open_brace = i; |
1847 | break; |
1848 | } |
1849 | } |
1850 | if open_brace < span.start || open_brace >= span.end { |
1851 | return MarkdownLinkRange::WholeLink(span); |
1852 | } |
1853 | let mut nesting = 0; |
1854 | for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() { |
1855 | let i = i + open_brace; |
1856 | if b == close { |
1857 | nesting -= 1; |
1858 | } |
1859 | if b == open { |
1860 | nesting += 1; |
1861 | } |
1862 | if nesting == 0 { |
1863 | close_brace = i; |
1864 | break; |
1865 | } |
1866 | } |
1867 | assert!(open_brace != close_brace); |
1868 | if open_brace < span.start || open_brace >= span.end { |
1869 | return MarkdownLinkRange::WholeLink(span); |
1870 | } |
1871 | // do not actually include braces in the span |
1872 | let range = (open_brace + 1)..close_brace; |
1873 | MarkdownLinkRange::Destination(range) |
1874 | }; |
1875 | |
1876 | let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "" .into())); |
1877 | let event_iter = Parser::new_with_broken_link_callback( |
1878 | md, |
1879 | main_body_opts(), |
1880 | Some(&mut broken_link_callback), |
1881 | ) |
1882 | .into_offset_iter(); |
1883 | let mut links = Vec::new(); |
1884 | |
1885 | let mut refdefs = FxIndexMap::default(); |
1886 | for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) { |
1887 | refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone())); |
1888 | } |
1889 | |
1890 | for (event, span) in event_iter { |
1891 | match event { |
1892 | Event::Start(Tag::Link { link_type, dest_url, id, .. }) |
1893 | if may_be_doc_link(link_type) => |
1894 | { |
1895 | let range = match link_type { |
1896 | // Link is pulled from the link itself. |
1897 | LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { |
1898 | span_for_offset_backward(span, b'[' , b']' ) |
1899 | } |
1900 | LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[' , b']' ), |
1901 | LinkType::Inline => span_for_offset_backward(span, b'(' , b')' ), |
1902 | // Link is pulled from elsewhere in the document. |
1903 | LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { |
1904 | if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) { |
1905 | *is_used = true; |
1906 | span_for_refdef(&CowStr::from(&dest_url[..]), span.clone()) |
1907 | } else { |
1908 | span_for_link(&dest_url, span) |
1909 | } |
1910 | } |
1911 | LinkType::Autolink | LinkType::Email => unreachable!(), |
1912 | }; |
1913 | |
1914 | if let Some(link) = preprocess_link(MarkdownLink { |
1915 | kind: link_type, |
1916 | link: dest_url.into_string(), |
1917 | range, |
1918 | }) { |
1919 | links.push(link); |
1920 | } |
1921 | } |
1922 | _ => {} |
1923 | } |
1924 | } |
1925 | |
1926 | for (_label, (is_used, dest_url, span)) in refdefs.into_iter() { |
1927 | if !is_used |
1928 | && let Some(link) = preprocess_link(MarkdownLink { |
1929 | kind: LinkType::Reference, |
1930 | range: span_for_refdef(&CowStr::from(&dest_url[..]), span), |
1931 | link: dest_url, |
1932 | }) |
1933 | { |
1934 | links.push(link); |
1935 | } |
1936 | } |
1937 | |
1938 | links |
1939 | } |
1940 | |
1941 | #[derive (Debug)] |
1942 | pub(crate) struct RustCodeBlock { |
1943 | /// The range in the markdown that the code block occupies. Note that this includes the fences |
1944 | /// for fenced code blocks. |
1945 | pub(crate) range: Range<usize>, |
1946 | /// The range in the markdown that the code within the code block occupies. |
1947 | pub(crate) code: Range<usize>, |
1948 | pub(crate) is_fenced: bool, |
1949 | pub(crate) lang_string: LangString, |
1950 | } |
1951 | |
1952 | /// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or |
1953 | /// untagged (and assumed to be rust). |
1954 | pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> { |
1955 | let mut code_blocks = vec![]; |
1956 | |
1957 | if md.is_empty() { |
1958 | return code_blocks; |
1959 | } |
1960 | |
1961 | let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter(); |
1962 | |
1963 | while let Some((event, offset)) = p.next() { |
1964 | if let Event::Start(Tag::CodeBlock(syntax)) = event { |
1965 | let (lang_string, code_start, code_end, range, is_fenced) = match syntax { |
1966 | CodeBlockKind::Fenced(syntax) => { |
1967 | let syntax = syntax.as_ref(); |
1968 | let lang_string = if syntax.is_empty() { |
1969 | Default::default() |
1970 | } else { |
1971 | LangString::parse(syntax, ErrorCodes::Yes, false, Some(extra_info)) |
1972 | }; |
1973 | if !lang_string.rust { |
1974 | continue; |
1975 | } |
1976 | let (code_start, mut code_end) = match p.next() { |
1977 | Some((Event::Text(_), offset)) => (offset.start, offset.end), |
1978 | Some((_, sub_offset)) => { |
1979 | let code = Range { start: sub_offset.start, end: sub_offset.start }; |
1980 | code_blocks.push(RustCodeBlock { |
1981 | is_fenced: true, |
1982 | range: offset, |
1983 | code, |
1984 | lang_string, |
1985 | }); |
1986 | continue; |
1987 | } |
1988 | None => { |
1989 | let code = Range { start: offset.end, end: offset.end }; |
1990 | code_blocks.push(RustCodeBlock { |
1991 | is_fenced: true, |
1992 | range: offset, |
1993 | code, |
1994 | lang_string, |
1995 | }); |
1996 | continue; |
1997 | } |
1998 | }; |
1999 | while let Some((Event::Text(_), offset)) = p.next() { |
2000 | code_end = offset.end; |
2001 | } |
2002 | (lang_string, code_start, code_end, offset, true) |
2003 | } |
2004 | CodeBlockKind::Indented => { |
2005 | // The ending of the offset goes too far sometime so we reduce it by one in |
2006 | // these cases. |
2007 | if offset.end > offset.start && md.get(offset.end..=offset.end) == Some(" \n" ) { |
2008 | ( |
2009 | LangString::default(), |
2010 | offset.start, |
2011 | offset.end, |
2012 | Range { start: offset.start, end: offset.end - 1 }, |
2013 | false, |
2014 | ) |
2015 | } else { |
2016 | (LangString::default(), offset.start, offset.end, offset, false) |
2017 | } |
2018 | } |
2019 | }; |
2020 | |
2021 | code_blocks.push(RustCodeBlock { |
2022 | is_fenced, |
2023 | range, |
2024 | code: Range { start: code_start, end: code_end }, |
2025 | lang_string, |
2026 | }); |
2027 | } |
2028 | } |
2029 | |
2030 | code_blocks |
2031 | } |
2032 | |
2033 | #[derive (Clone, Default, Debug)] |
2034 | pub struct IdMap { |
2035 | map: FxHashMap<String, usize>, |
2036 | existing_footnotes: Arc<AtomicUsize>, |
2037 | } |
2038 | |
2039 | fn is_default_id(id: &str) -> bool { |
2040 | matches!( |
2041 | id, |
2042 | // This is the list of IDs used in JavaScript. |
2043 | "help" |
2044 | | "settings" |
2045 | | "not-displayed" |
2046 | | "alternative-display" |
2047 | | "search" |
2048 | | "crate-search" |
2049 | | "crate-search-div" |
2050 | // This is the list of IDs used in HTML generated in Rust (including the ones |
2051 | // used in tera template files). |
2052 | | "themeStyle" |
2053 | | "settings-menu" |
2054 | | "help-button" |
2055 | | "sidebar-button" |
2056 | | "main-content" |
2057 | | "toggle-all-docs" |
2058 | | "all-types" |
2059 | | "default-settings" |
2060 | | "sidebar-vars" |
2061 | | "copy-path" |
2062 | | "rustdoc-toc" |
2063 | | "rustdoc-modnav" |
2064 | // This is the list of IDs used by rustdoc sections (but still generated by |
2065 | // rustdoc). |
2066 | | "fields" |
2067 | | "variants" |
2068 | | "implementors-list" |
2069 | | "synthetic-implementors-list" |
2070 | | "foreign-impls" |
2071 | | "implementations" |
2072 | | "trait-implementations" |
2073 | | "synthetic-implementations" |
2074 | | "blanket-implementations" |
2075 | | "required-associated-types" |
2076 | | "provided-associated-types" |
2077 | | "provided-associated-consts" |
2078 | | "required-associated-consts" |
2079 | | "required-methods" |
2080 | | "provided-methods" |
2081 | | "dyn-compatibility" |
2082 | | "implementors" |
2083 | | "synthetic-implementors" |
2084 | | "implementations-list" |
2085 | | "trait-implementations-list" |
2086 | | "synthetic-implementations-list" |
2087 | | "blanket-implementations-list" |
2088 | | "deref-methods" |
2089 | | "layout" |
2090 | | "aliased-type" |
2091 | ) |
2092 | } |
2093 | |
2094 | impl IdMap { |
2095 | pub fn new() -> Self { |
2096 | IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) } |
2097 | } |
2098 | |
2099 | pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String { |
2100 | let id = match self.map.get_mut(candidate.as_ref()) { |
2101 | None => { |
2102 | let candidate = candidate.to_string(); |
2103 | if is_default_id(&candidate) { |
2104 | let id = format!(" {}- {}" , candidate, 1); |
2105 | self.map.insert(candidate, 2); |
2106 | id |
2107 | } else { |
2108 | candidate |
2109 | } |
2110 | } |
2111 | Some(a) => { |
2112 | let id = format!(" {}- {}" , candidate.as_ref(), *a); |
2113 | *a += 1; |
2114 | id |
2115 | } |
2116 | }; |
2117 | |
2118 | self.map.insert(id.clone(), 1); |
2119 | id |
2120 | } |
2121 | |
2122 | /// Method to handle `existing_footnotes` increment automatically (to prevent forgetting |
2123 | /// about it). |
2124 | pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>( |
2125 | &'a mut self, |
2126 | closure: F, |
2127 | ) -> T { |
2128 | let existing_footnotes = Arc::downgrade(&self.existing_footnotes); |
2129 | |
2130 | closure(self, existing_footnotes) |
2131 | } |
2132 | |
2133 | pub(crate) fn clear(&mut self) { |
2134 | self.map.clear(); |
2135 | self.existing_footnotes = Arc::new(AtomicUsize::new(0)); |
2136 | } |
2137 | } |
2138 | |