8000 Strip frontmatter in fewer places by fmease · Pull Request #146340 · rust-lang/rust · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Strip frontmatter in fewer places
  • Loading branch information
fmease committed Sep 9, 2025
commit 7a66925a8138df105f77d646f9206024baf7ea4b
69 changes: 44 additions & 25 deletions compiler/rustc_builtin_macros/src/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ use rustc_expand::base::{
};
use rustc_expand::module::DirOwnership;
use rustc_lint_defs::BuiltinLintDiag;
use rustc_parse::parser::{ForceCollect, Parser};
use rustc_parse::lexer::StripTokens;
use rustc_parse::parser::ForceCollect;
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
use rustc_session::parse::ParseSess;
use rustc_span::source_map::SourceMap;
use rustc_span::{ByteSymbol, Pos, Span, Symbol};
use smallvec::SmallVec;
Expand Down Expand Up @@ -114,64 +116,76 @@ pub(crate) fn expand_include<'cx>(
let ExpandResult::Ready(mac) = get_single_str_from_tts(cx, sp, tts, "include!") else {
return ExpandResult::Retry(());
};
let file = match mac {
Ok(file) => file,
let path = match mac {
Ok(path) => path,
Err(guar) => return ExpandResult::Ready(DummyResult::any(sp, guar)),
};
// The file will be added to the code map by the parser
let file = match resolve_path(&cx.sess, file.as_str(), sp) {
Ok(f) => f,
let path = match resolve_path(&cx.sess, path.as_str(), sp) {
Ok(path) => path,
Err(err) => {
let guar = err.emit();
return ExpandResult::Ready(DummyResult::any(sp, guar));
}
};
let p = unwrap_or_emit_fatal(new_parser_from_file(cx.psess(), &file, Some(sp)));

// If in the included file we have e.g., `mod bar;`,
// then the path of `bar.rs` should be relative to the directory of `file`.
// then the path of `bar.rs` should be relative to the directory of `path`.
// See https://github.com/rust-lang/rust/pull/69838/files#r395217057 for a discussion.
// `MacroExpander::fully_expand_fragment` later restores, so "stack discipline" is maintained.
let dir_path = file.parent().unwrap_or(&file).to_owned();
let dir_path = path.parent().unwrap_or(&path).to_owned();
cx.current_expansion.module = Rc::new(cx.current_expansion.module.with_dir_path(dir_path));
cx.current_expansion.dir_ownership = DirOwnership::Owned { relative: None };

struct ExpandInclude<'a> {
p: Parser<'a>,
psess: &'a ParseSess,
path: PathBuf,
node_id: ast::NodeId,
span: Span,
}
impl<'a> MacResult for ExpandInclude<'a> {
fn make_expr(mut self: Box<ExpandInclude<'a>>) -> Option<Box<ast::Expr>> {
let expr = parse_expr(&mut self.p).ok()?;
if self.p.token != token::Eof {
self.p.psess.buffer_lint(
fn make_expr(self: Box<ExpandInclude<'a>>) -> Option<Box<ast::Expr>> {
let mut p = unwrap_or_emit_fatal(new_parser_from_file(
self.psess,
&self.path,
// Don't strip frontmatter for backward compatibility, `---` may be the start of a
// manifold negation. FIXME: Ideally, we wouldn't strip shebangs here either.
StripTokens::Shebang,
Some(self.span),
));
let expr = parse_expr(&mut p).ok()?;
if p.token != token::Eof {
p.psess.buffer_lint(
INCOMPLETE_INCLUDE,
self.p.token.span,
p.token.span,
self.node_id,
BuiltinLintDiag::IncompleteInclude,
);
}
Some(expr)
}

fn make_items(mut self: Box<ExpandInclude<'a>>) -> Option<SmallVec<[Box<ast::Item>; 1]>> {
fn make_items(self: Box<ExpandInclude<'a>>) -> Option<SmallVec<[Box<ast::Item>; 1]>> {
let mut p = unwrap_or_emit_fatal(new_parser_from_file(
self.psess,
&self.path,
StripTokens::ShebangAndFrontmatter,
Some(self.span),
));
let mut ret = SmallVec::new();
loop {
match self.p.parse_item(ForceCollect::No) {
match p.parse_item(ForceCollect::No) {
Err(err) => {
err.emit();
break;
}
Ok(Some(item)) => ret.push(item),
Ok(None) => {
if self.p.token != token::Eof {
self.p
.dcx()
.create_err(errors::ExpectedItem {
span: self.p.token.span,
E901 token: &pprust::token_to_string(&self.p.token),
})
.emit();
if p.token != token::Eof {
p.dcx().emit_err(errors::ExpectedItem {
span: p.token.span,
token: &pprust::token_to_string(&p.token),
});
}

break;
Expand All @@ -182,7 +196,12 @@ pub(crate) fn expand_include<'cx>(
}
}

ExpandResult::Ready(Box::new(ExpandInclude { p, node_id: cx.current_expansion.lint_node_id }))
ExpandResult::Ready(Box::new(ExpandInclude {
psess: cx.psess(),
path,
node_id: cx.current_expansion.lint_node_id,
span: sp,
}))
}

/// Expand `include_str!($input)` to the content of the UTF-8-encoded file given by path `$input` as a string literal.
Expand Down
12 changes: 9 additions & 3 deletions compiler/rustc_driver_impl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use rustc_lint::unerased_lint_store;
use rustc_metadata::creader::MetadataLoader;
use rustc_metadata::locator;
use rustc_middle::ty::TyCtxt;
use rustc_parse::lexer::StripTokens;
use rustc_parse::{new_parser_from_file, new_parser_from_source_str, unwrap_or_emit_fatal};
use rustc_session::config::{
CG_OPTIONS, CrateType, ErrorOutputType, Input, OptionDesc, OutFileName, OutputType, Sysroot,
Expand Down Expand Up @@ -1288,10 +1289,15 @@ fn warn_on_confusing_output_filename_flag(

fn parse_crate_attrs<'a>(sess: &'a Session) -> PResult<'a, ast::AttrVec> {
let mut parser = unwrap_or_emit_fatal(match &sess.io.input {
Input::File(file) => new_parser_from_file(&sess.psess, file, None),
Input::Str { name, input } => {
new_parser_from_source_str(&sess.psess, name.clone(), input.clone())
Input::File(file) => {
new_parser_from_file(&sess.psess, file, StripTokens::ShebangAndFrontmatter, None)
}
Input::Str { name, input } => new_parser_from_source_str(
&sess.psess,
name.clone(),
input.clone(),
StripTokens::ShebangAndFrontmatter,
),
});
parser.parse_inner_attributes()
}
Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_expand/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::path::{self, Path, PathBuf};
use rustc_ast::{AttrVec, Attribute, Inline, Item, ModSpans};
use rustc_attr_parsing::validate_attr;
use rustc_errors::{Diag, ErrorGuaranteed};
use rustc_parse::lexer::StripTokens;
use rustc_parse::{exp, new_parser_from_file, unwrap_or_emit_fatal};
use rustc_session::Session;
use rustc_session::parse::ParseSess;
Expand Down Expand Up @@ -67,8 +68,12 @@ pub(crate) fn parse_external_mod(
}

// Actually parse the external file as a module.
let mut parser =
unwrap_or_emit_fatal(new_parser_from_file(&sess.psess, &mp.file_path, Some(span)));
let mut parser = unwrap_or_emit_fatal(new_parser_from_file(
&sess.psess,
&mp.file_path,
StripTokens::ShebangAndFrontmatter,
Some(span),
));
let (inner_attrs, items, inner_span) =
parser.parse_mod(exp!(Eof)).map_err(|err| ModError::ParserError(err))?;
attrs.extend(inner_attrs);
Expand Down
11 changes: 8 additions & 3 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use rustc_ast::util::literal::escape_byte_str_symbol;
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_errors::{Diag, ErrorGuaranteed, MultiSpan, PResult};
use rustc_parse::lexer::nfc_normalize;
use rustc_parse::lexer::{StripTokens, nfc_normalize};
use rustc_parse::parser::Parser;
use rustc_parse::{exp, new_parser_from_source_str, source_str_to_stream, unwrap_or_emit_fatal};
use rustc_proc_macro::bridge::{
Expand Down Expand Up @@ -485,8 +485,13 @@ impl server::FreeFunctions for Rustc<'_, '_> {

fn literal_from_str(&mut self, s: &str) -> Result<Literal<Self::Span, Self::Symbol>, ()> {
let name = FileName::proc_macro_source_code(s);
let mut parser =
unwrap_or_emit_fatal(new_parser_from_source_str(self.psess(), name, s.to_owned()));

let mut parser = unwrap_or_emit_fatal(new_parser_from_source_str(
self.psess(),
name,
s.to_owned(),
StripTokens::Nothing,
Copy link
Member Author
@fmease fmease Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating this to not strip shebangs isn't breaking.

It already compares spans below to ensure no extra trivia is contained like whitespace, comments or – well – shebangs. However, it's just cleaner to pass Nothing here.

));

let first_span = parser.token.span.data();
let minus_present = parser.eat(exp!(Minus));
Expand Down
22 changes: 13 additions & 9 deletions compiler/rustc_interface/src/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ use rustc_lint::LintStore;
use rustc_middle::ty;
use rustc_middle::ty::CurrentGcx;
use rustc_middle::util::Providers;
use rustc_parse::new_parser_from_simple_source_str;
use rustc_parse::lexer::StripTokens;
use rustc_parse::new_parser_from_source_str;
use rustc_parse::parser::attr::AllowLeadingUnsafe;
use rustc_query_impl::QueryCtxt;
use rustc_query_system::query::print_query_stack;
Expand Down Expand Up @@ -68,7 +69,8 @@ pub(crate) fn parse_cfg(dcx: DiagCtxtHandle<'_>, cfgs: Vec<String>) -> Cfg {
};
}

match new_parser_from_simple_source_str(&psess, filename, s.to_string()) {
match new_parser_from_source_str(&psess, filename, s.to_string(), StripTokens::Nothing)
{
Ok(mut parser) => match parser.parse_meta_item(AllowLeadingUnsafe::No) {
Ok(meta_item) if parser.token == token::Eof => {
if meta_item.path.segments.len() != 1 {
Expand Down Expand Up @@ -166,13 +168,15 @@ pub(crate) fn parse_check_cfg(dcx: DiagCtxtHandle<'_>, specs: Vec<String>) -> Ch
error!("expected `cfg(name, values(\"value1\", \"value2\", ... \"valueN\"))`")
};

let mut parser = match new_parser_from_simple_source_str(&psess, filename, s.to_string()) {
Ok(parser) => parser,
Err(errs) => {
errs.into_iter().for_each(|err| err.cancel());
expected_error();
}
};
let mut parser =
match new_parser_from_source_str(&psess, filename, s.to_string(), StripTokens::Nothing)
{
Ok(parser) => parser,
Err(errs) => {
errs.into_iter().for_each(|err| err.cancel());
expected_error();
}
};

let meta_item = match parser.parse_meta_item(AllowLeadingUnsafe::No) {
Ok(meta_item) if parser.token == token::Eof => meta_item,
Expand Down
17 changes: 13 additions & 4 deletions compiler/rustc_interface/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use rustc_middle::arena::Arena;
use rustc_middle::dep_graph::DepsType;
use rustc_middle::ty::{self, CurrentGcx, GlobalCtxt, RegisteredTools, TyCtxt};
use rustc_middle::util::Providers;
use rustc_parse::lexer::StripTokens;
use rustc_parse::{new_parser_from_file, new_parser_from_source_str, unwrap_or_emit_fatal};
use rustc_passes::{abi_test, input_stats, layout_test};
use rustc_resolve::{Resolver, ResolverOutputs};
Expand All @@ -51,10 +52,18 @@ pub fn parse<'a>(sess: &'a Session) -> ast::Crate {
let mut krate = sess
.time("parse_crate", || {
let mut parser = unwrap_or_emit_fatal(match &sess.io.input {
Input::File(file) => new_parser_from_file(&sess.psess, file, None),
Input::Str { input, name } => {
new_parser_from_source_str(&sess.psess, name.clone(), input.clone())
}
Input::File(file) => new_parser_from_file(
&sess.psess,
file,
StripTokens::ShebangAndFrontmatter,
None,
),
Input::Str { input, name } => new_parser_from_source_str(
&sess.psess,
name.clone(),
input.clone(),
StripTokens::ShebangAndFrontmatter,
),
});
parser.parse_crate_mod()
})
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub(crate) struct UnmatchedDelim {
}

/// Which tokens should be stripped before lexing the tokens.
pub(crate) enum StripTokens {
pub enum StripTokens {
/// Strip both shebang and frontmatter.
ShebangAndFrontmatter,
/// Strip the shebang but not frontmatter.
Expand Down
36 changes: 16 additions & 20 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,18 @@ pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
}
}

/// Creates a new parser from a source string. On failure, the errors must be consumed via
/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
/// dropped.
pub fn new_parser_from_source_str(
psess: &ParseSess,
name: FileName,
source: String,
) -> Result<Parser<'_>, Vec<Diag<'_>>> {
let source_file = psess.source_map().new_source_file(name, source);
new_parser_from_source_file(psess, source_file, StripTokens::ShebangAndFrontmatter)
}

/// Creates a new parser from a simple (no shebang, no frontmatter) source string.
/// Creates a new parser from a source string.
///
/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
/// etc., otherwise a panic will occur when they are dropped.
pub fn new_parser_from_simple_source_str(
Copy link
Member Author
@fmease fmease Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this convenience wrapper (for StripTokens::Nothing) to force all callers to have to think what behavior they want which is a lot more robust. Otherwise, people will just overlook this function and blindly go for the "normal" variant new_parser_from_source_str.

pub fn new_parser_from_source_str(
psess: &ParseSess,
name: FileName,
source: String,
strip_tokens: StripTokens,
) -> Result<Parser<'_>, Vec<Diag<'_>>> {
let source_file = psess.source_map().new_source_file(name, source);
new_parser_from_source_file(psess, source_file, StripTokens::Nothing)
new_parser_from_source_file(psess, source_file, strip_tokens)
}

/// Creates a new parser from a filename. On failure, the errors must be consumed via
Expand All @@ -87,6 +76,7 @@ pub fn new_parser_from_simple_source_str(
pub fn new_parser_from_file<'a>(
psess: &'a ParseSess,
path: &Path,
strip_tokens: StripTokens,
sp: Option<Span>,
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
let sm = psess.source_map();
Expand All @@ -110,7 +100,7 @@ pub fn new_parser_from_file<'a>(
}
err.emit();
});
new_parser_from_source_file(psess, source_file, StripTokens::ShebangAndFrontmatter)
new_parser_from_source_file(psess, source_file, strip_tokens)
}

pub fn utf8_error<E: EmissionGuarantee>(
Expand Down Expand Up @@ -172,20 +162,26 @@ fn new_parser_from_source_file(
Ok(parser)
}

/// Given a source string, produces a sequence of token trees.
///
/// NOTE: This only strips shebangs, not frontmatter!
pub fn source_str_to_stream(
psess: &ParseSess,
name: FileName,
source: String,
override_span: Option<Span>,
Copy link
Member Author
@fmease fmease Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might add strip_tokens: StripTokens as a parameter to this function in a future PR. At the time of writing, it wouldn't really matter but there are some call sites like the one for -Zcrate-attr where we could pass StripTokens::Nothing (since the input is wrapped in #![ ] anyway) but it wouldn't lead to any observable difference in rustc's behavior.

) -> Result<TokenStream, Vec<Diag<'_>>> {
let source_file = psess.source_map().new_source_file(name, source);
// used mainly for `proc_macro` and the likes, not for our parsing purposes, so don't parse
// frontmatters as frontmatters, but for compatibility reason still strip the shebang
// FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
// in the current edition since that would be breaking.
// See also <https://github.com/rust-lang/rust/issues/145520>.
// Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
}

/// Given a source file, produces a sequence of token trees. Returns any buffered errors from
/// parsing the token stream.
/// Given a source file, produces a sequence of token trees.
///
/// Returns any buffered errors from parsing the token stream.
fn source_file_to_stream<'psess>(
psess: &'psess ParseSess,
source_file: Arc<SourceFile>,
Expand Down
Loading
Loading
0