1use std::borrow::Cow;
30use std::collections::VecDeque;
31use std::fmt::{self, Write};
32use std::iter::Peekable;
33use std::ops::{ControlFlow, Range};
34use std::path::PathBuf;
35use std::str::{self, CharIndices};
36use std::sync::atomic::AtomicUsize;
37use std::sync::{Arc, Weak};
38
39use pulldown_cmark::{
40 BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
41};
42use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
43use rustc_errors::{Diag, DiagMessage};
44use rustc_hir::def_id::LocalDefId;
45use rustc_middle::ty::TyCtxt;
46pub(crate) use rustc_resolve::rustdoc::main_body_opts;
47use rustc_resolve::rustdoc::may_be_doc_link;
48use rustc_span::edition::Edition;
49use rustc_span::{Span, Symbol};
50use tracing::{debug, trace};
51
52use crate::clean::RenderedLink;
53use crate::doctest;
54use crate::doctest::GlobalTestOptions;
55use crate::html::escape::{Escape, EscapeBodyText};
56use crate::html::highlight;
57use crate::html::length_limit::HtmlWithLimit;
58use crate::html::render::small_url_encode;
59use crate::html::toc::{Toc, TocBuilder};
60
61mod footnotes;
62#[cfg(test)]
63mod tests;
64
65const MAX_HEADER_LEVEL: u32 = 6;
66
67pub(crate) fn summary_opts() -> Options {
69 Options::ENABLE_TABLES
70 | Options::ENABLE_FOOTNOTES
71 | Options::ENABLE_STRIKETHROUGH
72 | Options::ENABLE_TASKLISTS
73 | Options::ENABLE_SMART_PUNCTUATION
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum HeadingOffset {
78 H1 = 0,
79 H2,
80 H3,
81 H4,
82 H5,
83 H6,
84}
85
86pub struct Markdown<'a> {
89 pub content: &'a str,
90 pub links: &'a [RenderedLink],
92 pub ids: &'a mut IdMap,
94 pub error_codes: ErrorCodes,
96 pub edition: Edition,
98 pub playground: &'a Option<Playground>,
99 pub heading_offset: HeadingOffset,
102}
103pub(crate) struct MarkdownWithToc<'a> {
105 pub(crate) content: &'a str,
106 pub(crate) links: &'a [RenderedLink],
107 pub(crate) ids: &'a mut IdMap,
108 pub(crate) error_codes: ErrorCodes,
109 pub(crate) edition: Edition,
110 pub(crate) playground: &'a Option<Playground>,
111}
112pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap);
115pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
117
118#[derive(Copy, Clone, PartialEq, Debug)]
119pub enum ErrorCodes {
120 Yes,
121 No,
122}
123
124impl ErrorCodes {
125 pub(crate) fn from(b: bool) -> Self {
126 match b {
127 true => ErrorCodes::Yes,
128 false => ErrorCodes::No,
129 }
130 }
131
132 pub(crate) fn as_bool(self) -> bool {
133 match self {
134 ErrorCodes::Yes => true,
135 ErrorCodes::No => false,
136 }
137 }
138}
139
140pub(crate) enum Line<'a> {
144 Hidden(&'a str),
145 Shown(Cow<'a, str>),
146}
147
148impl<'a> Line<'a> {
149 fn for_html(self) -> Option<Cow<'a, str>> {
150 match self {
151 Line::Shown(l) => Some(l),
152 Line::Hidden(_) => None,
153 }
154 }
155
156 pub(crate) fn for_code(self) -> Cow<'a, str> {
157 match self {
158 Line::Shown(l) => l,
159 Line::Hidden(l) => Cow::Borrowed(l),
160 }
161 }
162}
163
164pub(crate) fn map_line(s: &str) -> Line<'_> {
172 let trimmed = s.trim();
173 if trimmed.starts_with("##") {
174 Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
175 } else if let Some(stripped) = trimmed.strip_prefix("# ") {
176 Line::Hidden(stripped)
178 } else if trimmed == "#" {
179 Line::Hidden("")
181 } else {
182 Line::Shown(Cow::Borrowed(s))
183 }
184}
185
186fn slugify(c: char) -> Option<char> {
190 if c.is_alphanumeric() || c == '-' || c == '_' {
191 if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
192 } else if c.is_whitespace() && c.is_ascii() {
193 Some('-')
194 } else {
195 None
196 }
197}
198
199#[derive(Debug)]
200pub struct Playground {
201 pub crate_name: Option<Symbol>,
202 pub url: String,
203}
204
205struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
207 inner: I,
208 check_error_codes: ErrorCodes,
209 edition: Edition,
210 playground: &'p Option<Playground>,
213}
214
215impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
216 fn new(
217 iter: I,
218 error_codes: ErrorCodes,
219 edition: Edition,
220 playground: &'p Option<Playground>,
221 ) -> Self {
222 CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
223 }
224}
225
226impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
227 type Item = Event<'a>;
228
229 fn next(&mut self) -> Option<Self::Item> {
230 let event = self.inner.next();
231 let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
232 return event;
233 };
234
235 let mut original_text = String::new();
236 for event in &mut self.inner {
237 match event {
238 Event::End(TagEnd::CodeBlock) => break,
239 Event::Text(ref s) => {
240 original_text.push_str(s);
241 }
242 _ => {}
243 }
244 }
245
246 let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
247 match kind {
248 CodeBlockKind::Fenced(ref lang) => {
249 let parse_result =
250 LangString::parse_without_check(lang, self.check_error_codes);
251 if !parse_result.rust {
252 let added_classes = parse_result.added_classes;
253 let lang_string = if let Some(lang) = parse_result.unknown.first() {
254 format!("language-{}", lang)
255 } else {
256 String::new()
257 };
258 let whitespace = if added_classes.is_empty() { "" } else { " " };
259 return Some(Event::Html(
260 format!(
261 "<div class=\"example-wrap\">\
262 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
263 <code>{text}</code>\
264 </pre>\
265 </div>",
266 added_classes = added_classes.join(" "),
267 text = Escape(
268 original_text.strip_suffix('\n').unwrap_or(&original_text)
269 ),
270 )
271 .into(),
272 ));
273 }
274 parse_result
275 }
276 CodeBlockKind::Indented => Default::default(),
277 };
278
279 let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
280 let text = lines.intersperse("\n".into()).collect::<String>();
281
282 let explicit_edition = edition.is_some();
283 let edition = edition.unwrap_or(self.edition);
284
285 let playground_button = self.playground.as_ref().and_then(|playground| {
286 let krate = &playground.crate_name;
287 let url = &playground.url;
288 if url.is_empty() {
289 return None;
290 }
291 let test = original_text
292 .lines()
293 .map(|l| map_line(l).for_code())
294 .intersperse("\n".into())
295 .collect::<String>();
296 let krate = krate.as_ref().map(|s| s.as_str());
297
298 let opts = GlobalTestOptions {
301 crate_name: krate.map(String::from).unwrap_or_default(),
302 no_crate_inject: false,
303 insert_indent_space: true,
304 args_file: PathBuf::new(),
305 };
306 let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
307 if let Some(krate) = krate {
308 builder = builder.crate_name(krate);
309 }
310 let doctest = builder.build(None);
311 let (wrapped, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
312 let test = wrapped.to_string();
313 let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" };
314
315 let test_escaped = small_url_encode(test);
316 Some(format!(
317 "<a class=\"test-arrow\" \
318 target=\"_blank\" \
319 title=\"Run code\" \
320 href=\"{url}?code={test_escaped}{channel}&edition={edition}\"></a>",
321 ))
322 });
323
324 let tooltip = if ignore == Ignore::All {
325 highlight::Tooltip::IgnoreAll
326 } else if let Ignore::Some(platforms) = ignore {
327 highlight::Tooltip::IgnoreSome(platforms)
328 } else if compile_fail {
329 highlight::Tooltip::CompileFail
330 } else if should_panic {
331 highlight::Tooltip::ShouldPanic
332 } else if explicit_edition {
333 highlight::Tooltip::Edition(edition)
334 } else {
335 highlight::Tooltip::None
336 };
337
338 let mut s = String::new();
341 s.push('\n');
342
343 highlight::render_example_with_highlighting(
344 &text,
345 &mut s,
346 tooltip,
347 playground_button.as_deref(),
348 &added_classes,
349 );
350 Some(Event::Html(s.into()))
351 }
352}
353
354struct LinkReplacerInner<'a> {
356 links: &'a [RenderedLink],
357 shortcut_link: Option<&'a RenderedLink>,
358}
359
360struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
361 iter: I,
362 inner: LinkReplacerInner<'a>,
363}
364
365impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
366 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
367 LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
368 }
369}
370
371struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
374 iter: I,
375 inner: LinkReplacerInner<'a>,
376}
377
378impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
379 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
380 SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
381 }
382}
383
384impl<'a> LinkReplacerInner<'a> {
385 fn handle_event(&mut self, event: &mut Event<'a>) {
386 match event {
388 Event::Start(Tag::Link {
391 link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
393 dest_url,
394 title,
395 ..
396 }) => {
397 debug!("saw start of shortcut link to {dest_url} with title {title}");
398 let link = self.links.iter().find(|&link| *link.href == **dest_url);
401 if let Some(link) = link {
404 trace!("it matched");
405 assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
406 self.shortcut_link = Some(link);
407 if title.is_empty() && !link.tooltip.is_empty() {
408 *title = CowStr::Borrowed(link.tooltip.as_ref());
409 }
410 }
411 }
412 Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
414 debug!("saw end of shortcut link");
415 self.shortcut_link = None;
416 }
417 Event::Code(text) => {
420 trace!("saw code {text}");
421 if let Some(link) = self.shortcut_link {
422 if let Some(link) = self.links.iter().find(|l| {
432 l.href == link.href
433 && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
434 }) {
435 debug!("replacing {text} with {new_text}", new_text = link.new_text);
436 *text = CowStr::Borrowed(&link.new_text);
437 }
438 }
439 }
440 Event::Text(text) => {
443 trace!("saw text {text}");
444 if let Some(link) = self.shortcut_link {
445 if let Some(link) = self
447 .links
448 .iter()
449 .find(|l| l.href == link.href && **text == *l.original_text)
450 {
451 debug!("replacing {text} with {new_text}", new_text = link.new_text);
452 *text = CowStr::Borrowed(&link.new_text);
453 }
454 }
455 }
456 Event::Start(Tag::Link { dest_url, title, .. }) => {
459 if let Some(link) =
460 self.links.iter().find(|&link| *link.original_text == **dest_url)
461 {
462 *dest_url = CowStr::Borrowed(link.href.as_ref());
463 if title.is_empty() && !link.tooltip.is_empty() {
464 *title = CowStr::Borrowed(link.tooltip.as_ref());
465 }
466 }
467 }
468 _ => {}
470 }
471 }
472}
473
474impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
475 type Item = Event<'a>;
476
477 fn next(&mut self) -> Option<Self::Item> {
478 let mut event = self.iter.next();
479 if let Some(ref mut event) = event {
480 self.inner.handle_event(event);
481 }
482 event
484 }
485}
486
487impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
488 type Item = SpannedEvent<'a>;
489
490 fn next(&mut self) -> Option<Self::Item> {
491 let (mut event, range) = self.iter.next()?;
492 self.inner.handle_event(&mut event);
493 Some((event, range))
495 }
496}
497
498struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
500 inner: I,
501 stored_events: VecDeque<Event<'a>>,
502}
503
504impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
505 fn new(iter: I) -> Self {
506 Self { inner: iter, stored_events: VecDeque::new() }
507 }
508}
509
510impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
511 type Item = Event<'a>;
512
513 fn next(&mut self) -> Option<Self::Item> {
514 if let Some(first) = self.stored_events.pop_front() {
515 return Some(first);
516 }
517
518 let event = self.inner.next()?;
519
520 Some(match event {
521 Event::Start(Tag::Table(t)) => {
522 self.stored_events.push_back(Event::Start(Tag::Table(t)));
523 Event::Html(CowStr::Borrowed("<div>"))
524 }
525 Event::End(TagEnd::Table) => {
526 self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
527 Event::End(TagEnd::Table)
528 }
529 e => e,
530 })
531 }
532}
533
534type SpannedEvent<'a> = (Event<'a>, Range<usize>);
535
536struct HeadingLinks<'a, 'b, 'ids, I> {
538 inner: I,
539 toc: Option<&'b mut TocBuilder>,
540 buf: VecDeque<SpannedEvent<'a>>,
541 id_map: &'ids mut IdMap,
542 heading_offset: HeadingOffset,
543}
544
545impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
546 fn new(
547 iter: I,
548 toc: Option<&'b mut TocBuilder>,
549 ids: &'ids mut IdMap,
550 heading_offset: HeadingOffset,
551 ) -> Self {
552 HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
553 }
554}
555
556impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
557 type Item = SpannedEvent<'a>;
558
559 fn next(&mut self) -> Option<Self::Item> {
560 if let Some(e) = self.buf.pop_front() {
561 return Some(e);
562 }
563
564 let event = self.inner.next();
565 if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
566 let mut id = String::new();
567 for event in &mut self.inner {
568 match &event.0 {
569 Event::End(TagEnd::Heading(_)) => break,
570 Event::Text(text) | Event::Code(text) => {
571 id.extend(text.chars().filter_map(slugify));
572 self.buf.push_back(event);
573 }
574 _ => self.buf.push_back(event),
575 }
576 }
577 let id = self.id_map.derive(id);
578
579 if let Some(ref mut builder) = self.toc {
580 let mut text_header = String::new();
581 plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
582 let mut html_header = String::new();
583 html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
584 let sec = builder.push(level as u32, text_header, html_header, id.clone());
585 self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
586 }
587
588 let level =
589 std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
590 self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
591
592 let start_tags =
593 format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
594 return Some((Event::Html(start_tags.into()), 0..0));
595 }
596 event
597 }
598}
599
600struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
602 inner: I,
603 started: bool,
604 depth: u32,
605 skipped_tags: u32,
606}
607
608impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
609 fn new(iter: I) -> Self {
610 SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
611 }
612}
613
614fn check_if_allowed_tag(t: &TagEnd) -> bool {
615 matches!(
616 t,
617 TagEnd::Paragraph
618 | TagEnd::Emphasis
619 | TagEnd::Strong
620 | TagEnd::Strikethrough
621 | TagEnd::Link
622 | TagEnd::BlockQuote
623 )
624}
625
626fn is_forbidden_tag(t: &TagEnd) -> bool {
627 matches!(
628 t,
629 TagEnd::CodeBlock
630 | TagEnd::Table
631 | TagEnd::TableHead
632 | TagEnd::TableRow
633 | TagEnd::TableCell
634 | TagEnd::FootnoteDefinition
635 )
636}
637
638impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
639 type Item = Event<'a>;
640
641 fn next(&mut self) -> Option<Self::Item> {
642 if self.started && self.depth == 0 {
643 return None;
644 }
645 if !self.started {
646 self.started = true;
647 }
648 if let Some(event) = self.inner.next() {
649 let mut is_start = true;
650 let is_allowed_tag = match event {
651 Event::Start(ref c) => {
652 if is_forbidden_tag(&c.to_end()) {
653 self.skipped_tags += 1;
654 return None;
655 }
656 self.depth += 1;
657 check_if_allowed_tag(&c.to_end())
658 }
659 Event::End(ref c) => {
660 if is_forbidden_tag(c) {
661 self.skipped_tags += 1;
662 return None;
663 }
664 self.depth -= 1;
665 is_start = false;
666 check_if_allowed_tag(c)
667 }
668 Event::FootnoteReference(_) => {
669 self.skipped_tags += 1;
670 false
671 }
672 _ => true,
673 };
674 if !is_allowed_tag {
675 self.skipped_tags += 1;
676 }
677 return if !is_allowed_tag {
678 if is_start {
679 Some(Event::Start(Tag::Paragraph))
680 } else {
681 Some(Event::End(TagEnd::Paragraph))
682 }
683 } else {
684 Some(event)
685 };
686 }
687 None
688 }
689}
690
691pub(crate) struct MdRelLine {
698 offset: usize,
699}
700
701impl MdRelLine {
702 pub(crate) const fn new(offset: usize) -> Self {
704 Self { offset }
705 }
706
707 pub(crate) const fn offset(self) -> usize {
709 self.offset
710 }
711}
712
713pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
714 doc: &str,
715 tests: &mut T,
716 error_codes: ErrorCodes,
717 extra_info: Option<&ExtraInfo<'_>>,
718) {
719 find_codes(doc, tests, error_codes, extra_info, false)
720}
721
722pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
723 doc: &str,
724 tests: &mut T,
725 error_codes: ErrorCodes,
726 extra_info: Option<&ExtraInfo<'_>>,
727 include_non_rust: bool,
728) {
729 let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
730 let mut prev_offset = 0;
731 let mut nb_lines = 0;
732 let mut register_header = None;
733 while let Some((event, offset)) = parser.next() {
734 match event {
735 Event::Start(Tag::CodeBlock(kind)) => {
736 let block_info = match kind {
737 CodeBlockKind::Fenced(ref lang) => {
738 if lang.is_empty() {
739 Default::default()
740 } else {
741 LangString::parse(lang, error_codes, extra_info)
742 }
743 }
744 CodeBlockKind::Indented => Default::default(),
745 };
746 if !include_non_rust && !block_info.rust {
747 continue;
748 }
749
750 let mut test_s = String::new();
751
752 while let Some((Event::Text(s), _)) = parser.next() {
753 test_s.push_str(&s);
754 }
755 let text = test_s
756 .lines()
757 .map(|l| map_line(l).for_code())
758 .collect::<Vec<Cow<'_, str>>>()
759 .join("\n");
760
761 nb_lines += doc[prev_offset..offset.start].lines().count();
762 if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
766 nb_lines -= 1;
767 }
768 let line = MdRelLine::new(nb_lines);
769 tests.visit_test(text, block_info, line);
770 prev_offset = offset.start;
771 }
772 Event::Start(Tag::Heading { level, .. }) => {
773 register_header = Some(level as u32);
774 }
775 Event::Text(ref s) if register_header.is_some() => {
776 let level = register_header.unwrap();
777 tests.visit_header(s, level);
778 register_header = None;
779 }
780 _ => {}
781 }
782 }
783}
784
785pub(crate) struct ExtraInfo<'tcx> {
786 def_id: LocalDefId,
787 sp: Span,
788 tcx: TyCtxt<'tcx>,
789}
790
791impl<'tcx> ExtraInfo<'tcx> {
792 pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
793 ExtraInfo { def_id, sp, tcx }
794 }
795
796 fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
797 self.tcx.node_span_lint(
798 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
799 self.tcx.local_def_id_to_hir_id(self.def_id),
800 self.sp,
801 |lint| {
802 lint.primary_message(msg);
803 },
804 );
805 }
806
807 fn error_invalid_codeblock_attr_with_help(
808 &self,
809 msg: impl Into<DiagMessage>,
810 f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
811 ) {
812 self.tcx.node_span_lint(
813 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
814 self.tcx.local_def_id_to_hir_id(self.def_id),
815 self.sp,
816 |lint| {
817 lint.primary_message(msg);
818 f(lint);
819 },
820 );
821 }
822}
823
824#[derive(Eq, PartialEq, Clone, Debug)]
825pub(crate) struct LangString {
826 pub(crate) original: String,
827 pub(crate) should_panic: bool,
828 pub(crate) no_run: bool,
829 pub(crate) ignore: Ignore,
830 pub(crate) rust: bool,
831 pub(crate) test_harness: bool,
832 pub(crate) compile_fail: bool,
833 pub(crate) standalone_crate: bool,
834 pub(crate) error_codes: Vec<String>,
835 pub(crate) edition: Option<Edition>,
836 pub(crate) added_classes: Vec<String>,
837 pub(crate) unknown: Vec<String>,
838}
839
840#[derive(Eq, PartialEq, Clone, Debug)]
841pub(crate) enum Ignore {
842 All,
843 None,
844 Some(Vec<String>),
845}
846
847pub(crate) struct TagIterator<'a, 'tcx> {
887 inner: Peekable<CharIndices<'a>>,
888 data: &'a str,
889 is_in_attribute_block: bool,
890 extra: Option<&'a ExtraInfo<'tcx>>,
891 is_error: bool,
892}
893
894#[derive(Clone, Debug, Eq, PartialEq)]
895pub(crate) enum LangStringToken<'a> {
896 LangToken(&'a str),
897 ClassAttribute(&'a str),
898 KeyValueAttribute(&'a str, &'a str),
899}
900
901fn is_leading_char(c: char) -> bool {
902 c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
903}
904fn is_bareword_char(c: char) -> bool {
905 is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
906}
907fn is_separator(c: char) -> bool {
908 c == ' ' || c == ',' || c == '\t'
909}
910
911struct Indices {
912 start: usize,
913 end: usize,
914}
915
916impl<'a, 'tcx> TagIterator<'a, 'tcx> {
917 pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
918 Self {
919 inner: data.char_indices().peekable(),
920 data,
921 is_in_attribute_block: false,
922 extra,
923 is_error: false,
924 }
925 }
926
927 fn emit_error(&mut self, err: impl Into<DiagMessage>) {
928 if let Some(extra) = self.extra {
929 extra.error_invalid_codeblock_attr(err);
930 }
931 self.is_error = true;
932 }
933
934 fn skip_separators(&mut self) -> Option<usize> {
935 while let Some((pos, c)) = self.inner.peek() {
936 if !is_separator(*c) {
937 return Some(*pos);
938 }
939 self.inner.next();
940 }
941 None
942 }
943
944 fn parse_string(&mut self, start: usize) -> Option<Indices> {
945 for (pos, c) in self.inner.by_ref() {
946 if c == '"' {
947 return Some(Indices { start: start + 1, end: pos });
948 }
949 }
950 self.emit_error("unclosed quote string `\"`");
951 None
952 }
953
954 fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
955 while let Some((pos, c)) = self.inner.peek().copied() {
956 if is_bareword_char(c) {
957 self.inner.next();
958 } else {
959 let class = &self.data[start + 1..pos];
960 if class.is_empty() {
961 self.emit_error(format!("unexpected `{c}` character after `.`"));
962 return None;
963 } else if self.check_after_token() {
964 return Some(LangStringToken::ClassAttribute(class));
965 } else {
966 return None;
967 }
968 }
969 }
970 let class = &self.data[start + 1..];
971 if class.is_empty() {
972 self.emit_error("missing character after `.`");
973 None
974 } else if self.check_after_token() {
975 Some(LangStringToken::ClassAttribute(class))
976 } else {
977 None
978 }
979 }
980
981 fn parse_token(&mut self, start: usize) -> Option<Indices> {
982 while let Some((pos, c)) = self.inner.peek() {
983 if !is_bareword_char(*c) {
984 return Some(Indices { start, end: *pos });
985 }
986 self.inner.next();
987 }
988 self.emit_error("unexpected end");
989 None
990 }
991
992 fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
993 let key_indices =
994 if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
995 if key_indices.start == key_indices.end {
996 self.emit_error("unexpected empty string as key");
997 return None;
998 }
999
1000 if let Some((_, c)) = self.inner.next() {
1001 if c != '=' {
1002 self.emit_error(format!("expected `=`, found `{}`", c));
1003 return None;
1004 }
1005 } else {
1006 self.emit_error("unexpected end");
1007 return None;
1008 }
1009 let value_indices = match self.inner.next() {
1010 Some((pos, '"')) => self.parse_string(pos)?,
1011 Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1012 Some((_, c)) => {
1013 self.emit_error(format!("unexpected `{c}` character after `=`"));
1014 return None;
1015 }
1016 None => {
1017 self.emit_error("expected value after `=`");
1018 return None;
1019 }
1020 };
1021 if value_indices.start == value_indices.end {
1022 self.emit_error("unexpected empty string as value");
1023 None
1024 } else if self.check_after_token() {
1025 Some(LangStringToken::KeyValueAttribute(
1026 &self.data[key_indices.start..key_indices.end],
1027 &self.data[value_indices.start..value_indices.end],
1028 ))
1029 } else {
1030 None
1031 }
1032 }
1033
1034 fn check_after_token(&mut self) -> bool {
1036 if let Some((_, c)) = self.inner.peek().copied() {
1037 if c == '}' || is_separator(c) || c == '(' {
1038 true
1039 } else {
1040 self.emit_error(format!("unexpected `{c}` character"));
1041 false
1042 }
1043 } else {
1044 true
1046 }
1047 }
1048
1049 fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1050 if let Some((pos, c)) = self.inner.next() {
1051 if c == '}' {
1052 self.is_in_attribute_block = false;
1053 return self.next();
1054 } else if c == '.' {
1055 return self.parse_class(pos);
1056 } else if c == '"' || is_leading_char(c) {
1057 return self.parse_key_value(c, pos);
1058 } else {
1059 self.emit_error(format!("unexpected character `{c}`"));
1060 return None;
1061 }
1062 }
1063 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1064 None
1065 }
1066
1067 fn skip_paren_block(&mut self) -> bool {
1069 for (_, c) in self.inner.by_ref() {
1070 if c == ')' {
1071 return true;
1072 }
1073 }
1074 self.emit_error("unclosed comment: missing `)` at the end");
1075 false
1076 }
1077
1078 fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1079 while let Some((pos, c)) = self.inner.next() {
1080 if c == '"' {
1081 if pos != start {
1082 self.emit_error("expected ` `, `{` or `,` found `\"`");
1083 return None;
1084 }
1085 let indices = self.parse_string(pos)?;
1086 if let Some((_, c)) = self.inner.peek().copied()
1087 && c != '{'
1088 && !is_separator(c)
1089 && c != '('
1090 {
1091 self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1092 return None;
1093 }
1094 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1095 } else if c == '{' {
1096 self.is_in_attribute_block = true;
1097 return self.next();
1098 } else if is_separator(c) {
1099 if pos != start {
1100 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1101 }
1102 return self.next();
1103 } else if c == '(' {
1104 if !self.skip_paren_block() {
1105 return None;
1106 }
1107 if pos != start {
1108 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1109 }
1110 return self.next();
1111 } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1112 {
1113 continue;
1114 } else {
1115 self.emit_error(format!("unexpected character `{c}`"));
1116 return None;
1117 }
1118 }
1119 let token = &self.data[start..];
1120 if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1121 }
1122}
1123
1124impl<'a> Iterator for TagIterator<'a, '_> {
1125 type Item = LangStringToken<'a>;
1126
1127 fn next(&mut self) -> Option<Self::Item> {
1128 if self.is_error {
1129 return None;
1130 }
1131 let Some(start) = self.skip_separators() else {
1132 if self.is_in_attribute_block {
1133 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1134 }
1135 return None;
1136 };
1137 if self.is_in_attribute_block {
1138 self.parse_in_attribute_block()
1139 } else {
1140 self.parse_outside_attribute_block(start)
1141 }
1142 }
1143}
1144
1145impl Default for LangString {
1146 fn default() -> Self {
1147 Self {
1148 original: String::new(),
1149 should_panic: false,
1150 no_run: false,
1151 ignore: Ignore::None,
1152 rust: true,
1153 test_harness: false,
1154 compile_fail: false,
1155 standalone_crate: false,
1156 error_codes: Vec::new(),
1157 edition: None,
1158 added_classes: Vec::new(),
1159 unknown: Vec::new(),
1160 }
1161 }
1162}
1163
1164impl LangString {
1165 fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1166 Self::parse(string, allow_error_code_check, None)
1167 }
1168
1169 fn parse(
1170 string: &str,
1171 allow_error_code_check: ErrorCodes,
1172 extra: Option<&ExtraInfo<'_>>,
1173 ) -> Self {
1174 let allow_error_code_check = allow_error_code_check.as_bool();
1175 let mut seen_rust_tags = false;
1176 let mut seen_other_tags = false;
1177 let mut seen_custom_tag = false;
1178 let mut data = LangString::default();
1179 let mut ignores = vec![];
1180
1181 data.original = string.to_owned();
1182
1183 let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1184 for token in tokens {
1185 match token {
1186 LangStringToken::LangToken("should_panic") => {
1187 data.should_panic = true;
1188 seen_rust_tags = !seen_other_tags;
1189 }
1190 LangStringToken::LangToken("no_run") => {
1191 data.no_run = true;
1192 seen_rust_tags = !seen_other_tags;
1193 }
1194 LangStringToken::LangToken("ignore") => {
1195 data.ignore = Ignore::All;
1196 seen_rust_tags = !seen_other_tags;
1197 }
1198 LangStringToken::LangToken(x)
1199 if let Some(ignore) = x.strip_prefix("ignore-") =>
1200 {
1201 ignores.push(ignore.to_owned());
1202 seen_rust_tags = !seen_other_tags;
1203 }
1204 LangStringToken::LangToken("rust") => {
1205 data.rust = true;
1206 seen_rust_tags = true;
1207 }
1208 LangStringToken::LangToken("custom") => {
1209 seen_custom_tag = true;
1210 }
1211 LangStringToken::LangToken("test_harness") => {
1212 data.test_harness = true;
1213 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1214 }
1215 LangStringToken::LangToken("compile_fail") => {
1216 data.compile_fail = true;
1217 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1218 data.no_run = true;
1219 }
1220 LangStringToken::LangToken("standalone_crate") => {
1221 data.standalone_crate = true;
1222 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1223 }
1224 LangStringToken::LangToken(x)
1225 if let Some(edition) = x.strip_prefix("edition") =>
1226 {
1227 data.edition = edition.parse::<Edition>().ok();
1228 }
1229 LangStringToken::LangToken(x)
1230 if let Some(edition) = x.strip_prefix("rust")
1231 && edition.parse::<Edition>().is_ok()
1232 && let Some(extra) = extra =>
1233 {
1234 extra.error_invalid_codeblock_attr_with_help(
1235 format!("unknown attribute `{x}`"),
1236 |lint| {
1237 lint.help(format!(
1238 "there is an attribute with a similar name: `edition{edition}`"
1239 ));
1240 },
1241 );
1242 }
1243 LangStringToken::LangToken(x)
1244 if allow_error_code_check
1245 && let Some(error_code) = x.strip_prefix('E')
1246 && error_code.len() == 4 =>
1247 {
1248 if error_code.parse::<u32>().is_ok() {
1249 data.error_codes.push(x.to_owned());
1250 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1251 } else {
1252 seen_other_tags = true;
1253 }
1254 }
1255 LangStringToken::LangToken(x) if let Some(extra) = extra => {
1256 if let Some(help) = match x.to_lowercase().as_str() {
1257 "compile-fail" | "compile_fail" | "compilefail" => Some(
1258 "use `compile_fail` to invert the results of this test, so that it \
1259 passes if it cannot be compiled and fails if it can",
1260 ),
1261 "should-panic" | "should_panic" | "shouldpanic" => Some(
1262 "use `should_panic` to invert the results of this test, so that if \
1263 passes if it panics and fails if it does not",
1264 ),
1265 "no-run" | "no_run" | "norun" => Some(
1266 "use `no_run` to compile, but not run, the code sample during \
1267 testing",
1268 ),
1269 "test-harness" | "test_harness" | "testharness" => Some(
1270 "use `test_harness` to run functions marked `#[test]` instead of a \
1271 potentially-implicit `main` function",
1272 ),
1273 "standalone" | "standalone_crate" | "standalone-crate"
1274 if extra.sp.at_least_rust_2024() =>
1275 {
1276 Some(
1277 "use `standalone_crate` to compile this code block \
1278 separately",
1279 )
1280 }
1281 _ => None,
1282 } {
1283 extra.error_invalid_codeblock_attr_with_help(
1284 format!("unknown attribute `{x}`"),
1285 |lint| {
1286 lint.help(help).help(
1287 "this code block may be skipped during testing, \
1288 because unknown attributes are treated as markers for \
1289 code samples written in other programming languages, \
1290 unless it is also explicitly marked as `rust`",
1291 );
1292 },
1293 );
1294 }
1295 seen_other_tags = true;
1296 data.unknown.push(x.to_owned());
1297 }
1298 LangStringToken::LangToken(x) => {
1299 seen_other_tags = true;
1300 data.unknown.push(x.to_owned());
1301 }
1302 LangStringToken::KeyValueAttribute("class", value) => {
1303 data.added_classes.push(value.to_owned());
1304 }
1305 LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1306 extra
1307 .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1308 }
1309 LangStringToken::ClassAttribute(class) => {
1310 data.added_classes.push(class.to_owned());
1311 }
1312 _ => {}
1313 }
1314 }
1315 };
1316
1317 let mut tag_iter = TagIterator::new(string, extra);
1318 call(&mut tag_iter);
1319
1320 if !ignores.is_empty() {
1322 data.ignore = Ignore::Some(ignores);
1323 }
1324
1325 data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1326
1327 data
1328 }
1329}
1330
1331impl<'a> Markdown<'a> {
1332 pub fn write_into(self, f: impl fmt::Write) -> fmt::Result {
1333 if self.content.is_empty() {
1335 return Ok(());
1336 }
1337
1338 html::write_html_fmt(f, self.into_iter())
1339 }
1340
1341 fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1342 let Markdown {
1343 content: md,
1344 links,
1345 ids,
1346 error_codes: codes,
1347 edition,
1348 playground,
1349 heading_offset,
1350 } = self;
1351
1352 let replacer = move |broken_link: BrokenLink<'_>| {
1353 links
1354 .iter()
1355 .find(|link| *link.original_text == *broken_link.reference)
1356 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1357 };
1358
1359 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1360 let p = p.into_offset_iter();
1361
1362 ids.handle_footnotes(|ids, existing_footnotes| {
1363 let p = HeadingLinks::new(p, None, ids, heading_offset);
1364 let p = SpannedLinkReplacer::new(p, links);
1365 let p = footnotes::Footnotes::new(p, existing_footnotes);
1366 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1367 CodeBlocks::new(p, codes, edition, playground)
1368 })
1369 }
1370
1371 pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1377 if self.content.is_empty() {
1378 return (None, None);
1379 }
1380 let mut p = self.into_iter();
1381
1382 let mut event_level = 0;
1383 let mut summary_events = Vec::new();
1384 let mut get_next_tag = false;
1385
1386 let mut end_of_summary = false;
1387 while let Some(event) = p.next() {
1388 match event {
1389 Event::Start(_) => event_level += 1,
1390 Event::End(kind) => {
1391 event_level -= 1;
1392 if event_level == 0 {
1393 end_of_summary = true;
1395 get_next_tag = kind == TagEnd::Table;
1397 }
1398 }
1399 _ => {}
1400 }
1401 summary_events.push(event);
1402 if end_of_summary {
1403 if get_next_tag && let Some(event) = p.next() {
1404 summary_events.push(event);
1405 }
1406 break;
1407 }
1408 }
1409 let mut summary = String::new();
1410 html::push_html(&mut summary, summary_events.into_iter());
1411 if summary.is_empty() {
1412 return (None, None);
1413 }
1414 let mut content = String::new();
1415 html::push_html(&mut content, p);
1416
1417 if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1418 }
1419}
1420
1421impl MarkdownWithToc<'_> {
1422 pub(crate) fn into_parts(self) -> (Toc, String) {
1423 let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1424 self;
1425
1426 if md.is_empty() {
1428 return (Toc { entries: Vec::new() }, String::new());
1429 }
1430 let mut replacer = |broken_link: BrokenLink<'_>| {
1431 links
1432 .iter()
1433 .find(|link| *link.original_text == *broken_link.reference)
1434 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1435 };
1436
1437 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1438 let p = p.into_offset_iter();
1439
1440 let mut s = String::with_capacity(md.len() * 3 / 2);
1441
1442 let mut toc = TocBuilder::new();
1443
1444 ids.handle_footnotes(|ids, existing_footnotes| {
1445 let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1446 let p = footnotes::Footnotes::new(p, existing_footnotes);
1447 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1448 let p = CodeBlocks::new(p, codes, edition, playground);
1449 html::push_html(&mut s, p);
1450 });
1451
1452 (toc.into_toc(), s)
1453 }
1454
1455 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1456 let (toc, s) = self.into_parts();
1457 write!(f, "<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1458 }
1459}
1460
1461impl MarkdownItemInfo<'_> {
1462 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1463 let MarkdownItemInfo(md, ids) = self;
1464
1465 if md.is_empty() {
1467 return Ok(());
1468 }
1469 let p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1470
1471 let p = p.map(|event| match event.0 {
1473 Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1474 _ => event,
1475 });
1476
1477 ids.handle_footnotes(|ids, existing_footnotes| {
1478 let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1479 let p = footnotes::Footnotes::new(p, existing_footnotes);
1480 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1481 let p = p.filter(|event| {
1482 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1483 });
1484 html::write_html_fmt(&mut f, p)
1485 })
1486 }
1487}
1488
1489impl MarkdownSummaryLine<'_> {
1490 pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1491 let MarkdownSummaryLine(md, links) = self;
1492 if md.is_empty() {
1494 return (String::new(), false);
1495 }
1496
1497 let mut replacer = |broken_link: BrokenLink<'_>| {
1498 links
1499 .iter()
1500 .find(|link| *link.original_text == *broken_link.reference)
1501 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1502 };
1503
1504 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1505 .peekable();
1506 let mut summary = SummaryLine::new(p);
1507
1508 let mut s = String::new();
1509
1510 let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1511 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1512 });
1513
1514 html::push_html(&mut s, without_paragraphs);
1515
1516 let has_more_content =
1517 matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1518
1519 (s, has_more_content)
1520 }
1521
1522 pub(crate) fn into_string(self) -> String {
1523 self.into_string_with_has_more_content().0
1524 }
1525}
1526
1527fn markdown_summary_with_limit(
1536 md: &str,
1537 link_names: &[RenderedLink],
1538 length_limit: usize,
1539) -> (String, bool) {
1540 if md.is_empty() {
1541 return (String::new(), false);
1542 }
1543
1544 let mut replacer = |broken_link: BrokenLink<'_>| {
1545 link_names
1546 .iter()
1547 .find(|link| *link.original_text == *broken_link.reference)
1548 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1549 };
1550
1551 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1552 let mut p = LinkReplacer::new(p, link_names);
1553
1554 let mut buf = HtmlWithLimit::new(length_limit);
1555 let mut stopped_early = false;
1556 let _ = p.try_for_each(|event| {
1557 match &event {
1558 Event::Text(text) => {
1559 let r =
1560 text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1561 if r.is_break() {
1562 stopped_early = true;
1563 }
1564 return r;
1565 }
1566 Event::Code(code) => {
1567 buf.open_tag("code");
1568 let r = buf.push(code);
1569 if r.is_break() {
1570 stopped_early = true;
1571 } else {
1572 buf.close_tag();
1573 }
1574 return r;
1575 }
1576 Event::Start(tag) => match tag {
1577 Tag::Emphasis => buf.open_tag("em"),
1578 Tag::Strong => buf.open_tag("strong"),
1579 Tag::CodeBlock(..) => return ControlFlow::Break(()),
1580 _ => {}
1581 },
1582 Event::End(tag) => match tag {
1583 TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1584 TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1585 _ => {}
1586 },
1587 Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1588 _ => {}
1589 };
1590 ControlFlow::Continue(())
1591 });
1592
1593 (buf.finish(), stopped_early)
1594}
1595
1596pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1603 let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1604
1605 if was_shortened {
1606 s.push('…');
1607 }
1608
1609 s
1610}
1611
1612pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1619 if md.is_empty() {
1620 return String::new();
1621 }
1622
1623 let mut s = String::with_capacity(md.len() * 3 / 2);
1624
1625 let mut replacer = |broken_link: BrokenLink<'_>| {
1626 link_names
1627 .iter()
1628 .find(|link| *link.original_text == *broken_link.reference)
1629 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1630 };
1631
1632 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1633
1634 plain_text_from_events(p, &mut s);
1635
1636 s
1637}
1638
1639pub(crate) fn plain_text_from_events<'a>(
1640 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1641 s: &mut String,
1642) {
1643 for event in events {
1644 match &event {
1645 Event::Text(text) => s.push_str(text),
1646 Event::Code(code) => {
1647 s.push('`');
1648 s.push_str(code);
1649 s.push('`');
1650 }
1651 Event::HardBreak | Event::SoftBreak => s.push(' '),
1652 Event::Start(Tag::CodeBlock(..)) => break,
1653 Event::End(TagEnd::Paragraph) => break,
1654 Event::End(TagEnd::Heading(..)) => break,
1655 _ => (),
1656 }
1657 }
1658}
1659
1660pub(crate) fn html_text_from_events<'a>(
1661 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1662 s: &mut String,
1663) {
1664 for event in events {
1665 match &event {
1666 Event::Text(text) => {
1667 write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1668 }
1669 Event::Code(code) => {
1670 s.push_str("<code>");
1671 write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1672 s.push_str("</code>");
1673 }
1674 Event::HardBreak | Event::SoftBreak => s.push(' '),
1675 Event::Start(Tag::CodeBlock(..)) => break,
1676 Event::End(TagEnd::Paragraph) => break,
1677 Event::End(TagEnd::Heading(..)) => break,
1678 _ => (),
1679 }
1680 }
1681}
1682
1683#[derive(Debug)]
1684pub(crate) struct MarkdownLink {
1685 pub kind: LinkType,
1686 pub link: String,
1687 pub range: MarkdownLinkRange,
1688}
1689
1690#[derive(Clone, Debug)]
1691pub(crate) enum MarkdownLinkRange {
1692 Destination(Range<usize>),
1694 WholeLink(Range<usize>),
1698}
1699
1700impl MarkdownLinkRange {
1701 pub fn inner_range(&self) -> &Range<usize> {
1703 match self {
1704 MarkdownLinkRange::Destination(range) => range,
1705 MarkdownLinkRange::WholeLink(range) => range,
1706 }
1707 }
1708}
1709
1710pub(crate) fn markdown_links<'md, R>(
1711 md: &'md str,
1712 preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1713) -> Vec<R> {
1714 use itertools::Itertools;
1715 if md.is_empty() {
1716 return vec![];
1717 }
1718
1719 let locate = |s: &str, fallback: Range<usize>| unsafe {
1721 let s_start = s.as_ptr();
1722 let s_end = s_start.add(s.len());
1723 let md_start = md.as_ptr();
1724 let md_end = md_start.add(md.len());
1725 if md_start <= s_start && s_end <= md_end {
1726 let start = s_start.offset_from(md_start) as usize;
1727 let end = s_end.offset_from(md_start) as usize;
1728 MarkdownLinkRange::Destination(start..end)
1729 } else {
1730 MarkdownLinkRange::WholeLink(fallback)
1731 }
1732 };
1733
1734 let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1735 match link {
1740 CowStr::Borrowed(s) => locate(s, span),
1745
1746 CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1748 }
1749 };
1750
1751 let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1752 let mut square_brace_count = 0;
1755 let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1756 for (_i, c) in &mut iter {
1757 match c {
1758 b':' if square_brace_count == 0 => break,
1759 b'[' => square_brace_count += 1,
1760 b']' => square_brace_count -= 1,
1761 _ => {}
1762 }
1763 }
1764 while let Some((i, c)) = iter.next() {
1765 if c == b'<' {
1766 while let Some((j, c)) = iter.next() {
1767 match c {
1768 b'\\' => {
1769 let _ = iter.next();
1770 }
1771 b'>' => {
1772 return MarkdownLinkRange::Destination(
1773 i + 1 + span.start..j + span.start,
1774 );
1775 }
1776 _ => {}
1777 }
1778 }
1779 } else if !c.is_ascii_whitespace() {
1780 for (j, c) in iter.by_ref() {
1781 if c.is_ascii_whitespace() {
1782 return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1783 }
1784 }
1785 return MarkdownLinkRange::Destination(i + span.start..span.end);
1786 }
1787 }
1788 span_for_link(link, span)
1789 };
1790
1791 let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1792 let mut open_brace = !0;
1793 let mut close_brace = !0;
1794 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1795 let i = i + span.start;
1796 if b == close {
1797 close_brace = i;
1798 break;
1799 }
1800 }
1801 if close_brace < span.start || close_brace >= span.end {
1802 return MarkdownLinkRange::WholeLink(span);
1803 }
1804 let mut nesting = 1;
1805 for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1806 let i = i + span.start;
1807 if b == close {
1808 nesting += 1;
1809 }
1810 if b == open {
1811 nesting -= 1;
1812 }
1813 if nesting == 0 {
1814 open_brace = i;
1815 break;
1816 }
1817 }
1818 assert!(open_brace != close_brace);
1819 if open_brace < span.start || open_brace >= span.end {
1820 return MarkdownLinkRange::WholeLink(span);
1821 }
1822 let range = (open_brace + 1)..close_brace;
1824 MarkdownLinkRange::Destination(range)
1825 };
1826
1827 let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1828 let mut open_brace = !0;
1829 let mut close_brace = !0;
1830 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1831 let i = i + span.start;
1832 if b == open {
1833 open_brace = i;
1834 break;
1835 }
1836 }
1837 if open_brace < span.start || open_brace >= span.end {
1838 return MarkdownLinkRange::WholeLink(span);
1839 }
1840 let mut nesting = 0;
1841 for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1842 let i = i + open_brace;
1843 if b == close {
1844 nesting -= 1;
1845 }
1846 if b == open {
1847 nesting += 1;
1848 }
1849 if nesting == 0 {
1850 close_brace = i;
1851 break;
1852 }
1853 }
1854 assert!(open_brace != close_brace);
1855 if open_brace < span.start || open_brace >= span.end {
1856 return MarkdownLinkRange::WholeLink(span);
1857 }
1858 let range = (open_brace + 1)..close_brace;
1860 MarkdownLinkRange::Destination(range)
1861 };
1862
1863 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1864 let event_iter = Parser::new_with_broken_link_callback(
1865 md,
1866 main_body_opts(),
1867 Some(&mut broken_link_callback),
1868 )
1869 .into_offset_iter();
1870 let mut links = Vec::new();
1871
1872 let mut refdefs = FxIndexMap::default();
1873 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1874 refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1875 }
1876
1877 for (event, span) in event_iter {
1878 match event {
1879 Event::Start(Tag::Link { link_type, dest_url, id, .. })
1880 if may_be_doc_link(link_type) =>
1881 {
1882 let range = match link_type {
1883 LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1885 span_for_offset_backward(span, b'[', b']')
1886 }
1887 LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1888 LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1889 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1891 if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1892 *is_used = true;
1893 span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1894 } else {
1895 span_for_link(&dest_url, span)
1896 }
1897 }
1898 LinkType::Autolink | LinkType::Email => unreachable!(),
1899 };
1900
1901 if let Some(link) = preprocess_link(MarkdownLink {
1902 kind: link_type,
1903 link: dest_url.into_string(),
1904 range,
1905 }) {
1906 links.push(link);
1907 }
1908 }
1909 _ => {}
1910 }
1911 }
1912
1913 for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1914 if !is_used
1915 && let Some(link) = preprocess_link(MarkdownLink {
1916 kind: LinkType::Reference,
1917 range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
1918 link: dest_url,
1919 })
1920 {
1921 links.push(link);
1922 }
1923 }
1924
1925 links
1926}
1927
1928#[derive(Debug)]
1929pub(crate) struct RustCodeBlock {
1930 pub(crate) range: Range<usize>,
1933 pub(crate) code: Range<usize>,
1935 pub(crate) is_fenced: bool,
1936 pub(crate) lang_string: LangString,
1937}
1938
1939pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1942 let mut code_blocks = vec![];
1943
1944 if md.is_empty() {
1945 return code_blocks;
1946 }
1947
1948 let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1949
1950 while let Some((event, offset)) = p.next() {
1951 if let Event::Start(Tag::CodeBlock(syntax)) = event {
1952 let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1953 CodeBlockKind::Fenced(syntax) => {
1954 let syntax = syntax.as_ref();
1955 let lang_string = if syntax.is_empty() {
1956 Default::default()
1957 } else {
1958 LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
1959 };
1960 if !lang_string.rust {
1961 continue;
1962 }
1963 let (code_start, mut code_end) = match p.next() {
1964 Some((Event::Text(_), offset)) => (offset.start, offset.end),
1965 Some((_, sub_offset)) => {
1966 let code = Range { start: sub_offset.start, end: sub_offset.start };
1967 code_blocks.push(RustCodeBlock {
1968 is_fenced: true,
1969 range: offset,
1970 code,
1971 lang_string,
1972 });
1973 continue;
1974 }
1975 None => {
1976 let code = Range { start: offset.end, end: offset.end };
1977 code_blocks.push(RustCodeBlock {
1978 is_fenced: true,
1979 range: offset,
1980 code,
1981 lang_string,
1982 });
1983 continue;
1984 }
1985 };
1986 while let Some((Event::Text(_), offset)) = p.next() {
1987 code_end = offset.end;
1988 }
1989 (lang_string, code_start, code_end, offset, true)
1990 }
1991 CodeBlockKind::Indented => {
1992 if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
1995 (
1996 LangString::default(),
1997 offset.start,
1998 offset.end,
1999 Range { start: offset.start, end: offset.end - 1 },
2000 false,
2001 )
2002 } else {
2003 (LangString::default(), offset.start, offset.end, offset, false)
2004 }
2005 }
2006 };
2007
2008 code_blocks.push(RustCodeBlock {
2009 is_fenced,
2010 range,
2011 code: Range { start: code_start, end: code_end },
2012 lang_string,
2013 });
2014 }
2015 }
2016
2017 code_blocks
2018}
2019
2020#[derive(Clone, Default, Debug)]
2021pub struct IdMap {
2022 map: FxHashMap<String, usize>,
2023 existing_footnotes: Arc<AtomicUsize>,
2024}
2025
2026fn is_default_id(id: &str) -> bool {
2027 matches!(
2028 id,
2029 "help"
2031 | "settings"
2032 | "not-displayed"
2033 | "alternative-display"
2034 | "search"
2035 | "crate-search"
2036 | "crate-search-div"
2037 | "themeStyle"
2040 | "settings-menu"
2041 | "help-button"
2042 | "sidebar-button"
2043 | "main-content"
2044 | "toggle-all-docs"
2045 | "all-types"
2046 | "default-settings"
2047 | "sidebar-vars"
2048 | "copy-path"
2049 | "rustdoc-toc"
2050 | "rustdoc-modnav"
2051 | "fields"
2054 | "variants"
2055 | "implementors-list"
2056 | "synthetic-implementors-list"
2057 | "foreign-impls"
2058 | "implementations"
2059 | "trait-implementations"
2060 | "synthetic-implementations"
2061 | "blanket-implementations"
2062 | "required-associated-types"
2063 | "provided-associated-types"
2064 | "provided-associated-consts"
2065 | "required-associated-consts"
2066 | "required-methods"
2067 | "provided-methods"
2068 | "dyn-compatibility"
2069 | "implementors"
2070 | "synthetic-implementors"
2071 | "implementations-list"
2072 | "trait-implementations-list"
2073 | "synthetic-implementations-list"
2074 | "blanket-implementations-list"
2075 | "deref-methods"
2076 | "layout"
2077 | "aliased-type"
2078 )
2079}
2080
2081impl IdMap {
2082 pub fn new() -> Self {
2083 IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2084 }
2085
2086 pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2087 let id = match self.map.get_mut(candidate.as_ref()) {
2088 None => {
2089 let candidate = candidate.to_string();
2090 if is_default_id(&candidate) {
2091 let id = format!("{}-{}", candidate, 1);
2092 self.map.insert(candidate, 2);
2093 id
2094 } else {
2095 candidate
2096 }
2097 }
2098 Some(a) => {
2099 let id = format!("{}-{}", candidate.as_ref(), *a);
2100 *a += 1;
2101 id
2102 }
2103 };
2104
2105 self.map.insert(id.clone(), 1);
2106 id
2107 }
2108
2109 pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2112 &'a mut self,
2113 closure: F,
2114 ) -> T {
2115 let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2116
2117 closure(self, existing_footnotes)
2118 }
2119
2120 pub(crate) fn clear(&mut self) {
2121 self.map.clear();
2122 self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2123 }
2124}