8000 [refactoring] Split the 'parser_type' · UnBCIC-TP2/r-python@1481bcd · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit 1481bcd

Browse files
committed
[refactoring] Split the 'parser_type'
1 parent cfcc330 commit 1481bcd

File tree

3 files changed

+275
-1
lines changed

3 files changed

+275
-1
lines changed

src/ir/ast.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,15 @@ pub struct ValueConstructor {
167167
pub types: Vec<Type>,
168168
}
169169

170+
impl ValueConstructor {
171+
pub fn new(name: Name, types: Vec<Type>) -> Self {
172+
ValueConstructor {
173+
name,
174+
types,
175+
}
176+
}
177+
}
178+
170179
#[derive(Debug, PartialEq, Clone)]
171180
pub enum Expression {
172181
/* constants */
@@ -227,6 +236,7 @@ pub enum Statement {
227236
For(Name, Box<Expression>, Box<Statement>),
228237
Block(Vec<Statement>),
229238
Sequence(Box<Statement>, Box<Statement>),
239+
Assert(Box<Expression>, Box<Expression>),
230240
AssertTrue(Box<Expression>, String),
231241
AssertFalse(Box<Expression>, String),
232242
AssertEQ(Box<Expression>, Box<Expression>, String),
@@ -236,7 +246,7 @@ pub enum Statement {
236246
AssertFails(String),
237247
FuncDef(Function),
238248
Return(Box<Expression>),
239-
ADTDeclaration(Name, Vec<ValueConstructor>)
249+
ADTDeclaration(Name, Vec<ValueConstructor>),
240250
}
241251

242252
#[derive(Debug)]

src/parser/parser_common.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
use nom::{
2+
branch::alt,
3+
bytes::complete::{tag, take_while},
4+
character::complete::{alpha1, char, digit1, multispace0},
5+
combinator::{map, map_res, not, opt, peek, recognize, value, verify},
6+
multi::{fold_many0, many0, separated_list0},
7+
sequence::{delimited, pair, preceded, terminated},
8+
IResult,
9+
};
10+
11+
use crate::parser::keywords::KEYWORDS;
12+
13+
pub fn separator<'a>(sep: &'static str) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str> {
14+
delimited(multispace0, tag(sep), multispace0)
15+
}
16+
17+
/// Parses a reserved keyword (e.g., "if") surrounded by optional spaces
18+
/// Fails if followed by an identifier character
19+
pub fn keyword<'a>(kw: &'static str) -> impl FnMut(&'a st A36C r) -> IResult<&'a str, &'a str> {
20+
terminated(
21+
delimited(multispace0, tag(kw), multispace0),
22+
not(peek(identifier_start_or_continue)),
23+
)
24+
}
25+
26+
/// Parsers for identifiers.
27+
pub fn identifier(input: &str) -> IResult<&str, &str> {
28+
let (input, _) = multispace0(input)?;
29+
30+
let (input, first_char) = identifier_start(input)?;
31+
let (input, rest) = identifier_continue(input)?;
32+
33+
let ident = format!("{}{}", first_char, rest);
34+
35+
if KEYWORDS.contains(&ident.as_str()) {
36+
Err(nom::Err::Error(nom::error::Error::new(
37+
input,
38+
nom::error::ErrorKind::Tag,
39+
)))
40+
} else {
41+
Ok((input, Box::leak(ident.into_boxed_str())))
42+
}
43+
}
44+
45+
/// First character of an identifier: [a-zA-Z_]
46+
fn identifier_start(input: &str) -> IResult<&str, &str> {
47+
alt((alpha1, tag("_")))(input)
48+
}
49+
50+
/// Remaining characters: [a-zA-Z0-9_]*
51+
fn identifier_continue(input: &str) -> IResult<&str, &str> {
52+
recognize(many0(identifier_start_or_continue))(input)
53+
}
54+
55+
/// A single identifier character: alphanumeric or underscore
56+
fn identifier_start_or_continue(input: &str) -> IResult<&str, &str> {
57+
recognize(alt((alpha1, tag("_"), nom::character::complete::digit1)))(input)
58+
}
59+
60+
/// Accepts any character except '"' and control characters (like \n, \t)
61+
pub fn is_string_char(c: char) -> bool {
62+
c != '"' && !c.is_control()
63+
}

src/parser/parser_type.rs

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
use nom::{
2+
branch::alt,
3+
bytes::complete::{tag, take_while},
4+
character::complete::{alpha1, char, digit1, line_ending, multispace0, space0},
5+
combinator::{map, map_res, not, opt, peek, recognize, value, verify},
6+
multi::{fold_many0, many0, many1, separated_list0, separated_list1},
7+
sequence::{delimited, pair, preceded, terminated, tuple},
8+
IResult,
9+
};
10+
use std::str::FromStr;
11+
12+
use crate::ir::ast::{Type, ValueConstructor};
13+
14+
use crate::parser::parser_common::{keyword, separator, identifier};
15+
16+
pub fn parse_type(input: &str) -> IResult<&str, Type> {
17+
alt(
18+
(parse_basic_types,
19+< 10000 div class="diff-text-inner"> parse_list_type,
20+
parse_tuple_type,
21+
parse_maybe_type,
22+
parse_result_type,
23+
parse_function_type,
24+
parse_adt_type)
25+
)(input)
26+
}
27+
28+
fn parse_basic_types(input: &str) -> IResult<&str, Type> {
29+
map(
30+
alt((keyword("Int"),
31+
keyword("Real"),
32+
keyword("Boolean"),
33+
keyword("String"),
34+
keyword("Unit"),
35+
keyword("Any")
36+
)),
37+
|t| match t {
38+
"Int" => Type::TInteger,
39+
"Real" => Type::TReal,
40+
"Boolean" => Type::TBool,
41+
"String" => Type::TString,
42+
"Unit" => Type::TVoid,
43+
"Any" => Type::TAny,
44+
_ => unreachable!()
45+
}
46+
)(input)
47+
}
48+
49+
fn parse_list_type(input: &str) -> IResult<&str, Type> {
50+
map(tuple(
51+
(preceded(multispace0, char('[')),
52+
preceded(multispace0, parse_type),
53+
preceded(multispace0, char(']')),
54+
)),
55+
|(_, t, _)| Type::TList(Box::new(t))
56+
)(input)
57+
}
58+
59+
fn parse_tuple_type(input: &str) -> IResult<&str, Type> {
60+
map(tuple(
61+
(preceded(multispace0, char('(')),
62+
preceded(multispace0, separated_list1(separator(","), parse_type)),
63+
preceded(multispace0, char(')')),
64+
)),
65+
|(_, ts, _)| Type::TTuple(ts)
66+
)(input)
67+
}
68+
69+
fn parse_maybe_type(input: &str) -> IResult<&str, Type> {
70+
map(tuple(
71+
(preceded(multispace0, keyword("Maybe")),
72+
preceded(multispace0, char('[')),
73+
preceded(multispace0, parse_type),
74+
preceded(multispace0, char(']')),
75+
)),
76+
|(_, _, t, _)| Type::TMaybe(Box::new(t))
77+
)(input)
78+
}
79+
80+
fn parse_result_type(input: &str) -> IResult<&str, Type> {
81+
map(tuple(
82+
(preceded(multispace0, keyword("Result")),
83+
preceded(multispace0, char('[')),
84+
preceded(multispace0, parse_type),
85+
preceded(multispace0, char(',')),
86+
preceded(multispace0, parse_type),
87+
preceded(multispace0, char(']')),
88+
)),
89+
|(_, _, t_ok, _, t_err, _)| Type::TResult(Box::new(t_ok), Box::new(t_err))
90+
)(input)
91+
}
92+
93+
fn parse_function_type(input: &str) -> IResult<&str, Type> {
94+
map(tuple(
95+
(preceded(multispace0, char('(')),
96+
preceded(multispace0, separated_list0(separator(","), parse_type)),
97+
preceded(multispace0, char(')')),
98+
preceded(multispace0, tag("->")),
99+
preceded(multispace0, parse_type),
100+
)),
101+
|(_, t_args, _, _, t_ret)| Type::TFunction(Box::new(Some(t_ret)), t_args)
102+
)(input)
103+
}
104+
105+
fn parse_adt_type(input: &str) -> IResult<&str, Type> {
106+
map(
107+
tuple((
108+
keyword("data"),
109+
preceded(multispace0, identifier),
110+
preceded(multispace0, char(':')),
111+
many1(parse_adt_cons),
112+
preceded(multispace0, keyword("end")),
113+
)),
114+
|(_, name, _, cons, _)| Type::Tadt(name.to_string(), cons),
115+
)(input)
116+
}
117+
118+
fn parse_adt_cons(input: &str) -> IResult<&str, ValueConstructor> {
119+
map(
120+
tuple((
121+
preceded(multispace0, char('|')),
122+
preceded(multispace0, identifier),
123+
separated_list0(multispace0, parse_type),
124+
)),
125+
|(_, name, types)| ValueConstructor::new(name.to_string(), types),
126+
)(input)
127+
}
128+
129+
#[cfg(test)]
130+
mod tests {
131+
use super::*;
132+
133+
#[test]
134+
fn test_parse_basic_types() {
135+
assert_eq!(parse_basic_types("Int"), Ok(("", Type::TInteger)));
136+
assert_eq!(parse_basic_types("Boolean"), Ok(("", Type::TBool)));
137+
}
138+
139+
#[test]
140+
fn test_parse_list_type() {
141+
assert_eq!(
142+
parse_list_type("[Int]"),
143+
Ok(("", Type::TList(Box::new(Type::TInteger))))
144+
);
145+
}
146+
147+
#[test]
148+
fn test_parse_tuple_type() {
149+
assert_eq!(
150+
parse_tuple_type("(Int, Real)"),
151+
Ok(("", Type::TTuple(vec![Type::TInteger, Type::TReal])))
152+
);
153+
}
154+
155+
#[test]
156+
fn test_parse_maybe_type() {
157+
assert_eq!(
158+
parse_maybe_type("Maybe [Boolean]"),
159+
Ok(("", Type::TMaybe(Box::new(Type::TBool))))
160+
);
161+
}
162+
163+
#[test]
164+
fn test_parse_result_type() {
165+
assert_eq!(
166+
parse_result_type("Result [Int, String]"),
167+
Ok((
168+
"",
169+
Type::TResult(Box::new(Type::TInteger), Box::new(Type::TString))
170+
))
171+
);
172+
}
173+
174+
#[test]
175+
fn test_parse_function_type() {
176+
assert_eq!(
177+
parse_function_type("(Int, Boolean) -> String"),
178+
Ok((
179+
"",
180+
Type::TFunction(
181+
Box::new(Some(Type::TString)),
182+
vec![Type::TInteger, Type::TBool]
183+
)
184+
))
185+
);
186+
}
187+
188+
#[test]
189+
#[ignore]
190+
fn test_parse_adt_type() {
191+
let input = "data Maybe:\n | Just Int\n | Nothing\nend";
192+
let expected = Type::Tadt(
193+
"Maybe".to_string(),
194+
vec![
195+
ValueConstructor::new("Just".to_string(), vec![Type::TInteger]),
196+
ValueConstructor::new("Nothing".to_string(), vec![]),
197+
],
198+
);
199+
assert_eq!(parse_adt_type(input), Ok(("", expected)));
200+
}
201+
}

0 commit comments

Comments
 (0)
0