8000 parser: limit maximum number of tokens · graphql-python/graphql-core@0da3225 · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit 0da3225

Browse files
committed
parser: limit maximum number of tokens
Replicates graphql/graphql-js@9df9079
1 parent aab6d50 commit 0da3225

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

src/graphql/language/parser.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
def parse(
8585
source: SourceType,
8686
no_location: bool = False,
87+
max_tokens: Optional[int] = None,
8788
allow_legacy_fragment_variables: bool = False,
8889
experimental_client_controlled_nullability: bool = False,
8990
) -> DocumentNode:
@@ -95,6 +96,12 @@ def parse(
9596
they correspond to. The ``no_location`` option disables that behavior for
9697
performance or testing.
9798
99+
Parser CPU and memory usage is linear to the number of tokens in a document,
100+
however in extreme cases it becomes quadratic due to memory exhaustion.
101+
Parsing happens before validation so even invalid queries can burn lots of
102+
CPU time and memory.
103+
To prevent this you can set a maximum number of tokens allowed within a document.
104+
98105
Legacy feature (will be removed in v3.3):
99106
100107
If ``allow_legacy_fragment_variables`` is set to ``True``, the parser will
@@ -131,6 +138,7 @@ def parse(
131138
parser = Parser(
132139
source,
133140
no_location=no_location,
141+
max_tokens=max_tokens,
134142
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
135143
experimental_client_controlled_nullability=experimental_client_controlled_nullability, # noqa
136144
)
@@ -140,6 +148,7 @@ def parse(
140148
def parse_value(
141149
source: SourceType,
142150
no_location: bool = False,
151+
max_tokens: Optional[int] = None,
143152
allow_legacy_fragment_variables: bool = False,
144153
) -> ValueNode:
145154
"""Parse the AST for a given string containing a GraphQL value.
@@ -155,6 +164,7 @@ def parse_value(
155164
parser = Parser(
156165
source,
157166
no_location=no_location,
167+
max_tokens=max_tokens,
158168
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
159169
)
160170
parser.expect_token(TokenKind.SOF)
@@ -166,6 +176,7 @@ def parse_value(
166176
def parse_const_value(
167177
source: SourceType,
168178
no_location: bool = False,
179+
max_tokens: Optional[int] = None,
169180
allow_legacy_fragment_variables: bool = False,
170181
) -> ConstValueNode:
171182
"""Parse the AST for a given string containing a GraphQL constant value.
@@ -176,6 +187,7 @@ def parse_const_value(
176187
parser = Parser(
177188
source,
178189
no_location=no_location,
190+
max_tokens=max_tokens,
179191
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
180192
)
181193
parser.expect_token(TokenKind.SOF)
@@ -187,6 +199,7 @@ def parse_const_value(
187199
def parse_type(
188200
source: SourceType,
189201
no_location: bool = False,
202+
max_tokens: Optional[int] = None,
190203
allow_legacy_fragment_variables: bool = False,
191204
) -> TypeNode:
192205
"""Parse the AST for a given string containing a GraphQL Type.
@@ -202,6 +215,7 @@ def parse_type(
202215
parser = Parser(
203216
source,
204217
no_location=no_location,
218+
max_tokens=max_tokens,
205219
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
206220
)
207221
parser.expect_token(TokenKind.SOF)
@@ -222,27 +236,32 @@ class Parser:
222236
library, please use the `__version_info__` variable for version detection.
223237
"""
224238

225-
_lexer: Lexer
226239
_no_location: bool
240+
_max_tokens: Optional[int]
227241
_allow_legacy_fragment_variables: bool
228242
_experimental_client_controlled_nullability: bool
243+
_lexer: Lexer
244+
_token_counter: int
229245

230246
def __init__(
231247
self,
232248
source: SourceType,
233249
no_location: bool = False,
250+
max_tokens: Optional[int] = None,
234251
allow_legacy_fragment_variables: bool = False,
235252
experimental_client_controlled_nullability: bool = False,
236253
):
237254
if not is_source(source):
238255
source = Source(cast(str, source))
239256

240-
self._lexer = Lexer(source)
241257
self._no_location = no_location
258+
self._max_tokens = max_tokens
242259
self._allow_legacy_fragment_variables = allow_legacy_fragment_variables
243260
self._experimental_client_controlled_nullability = (
244261
experimental_client_controlled_nullability
245262
)
263+
self._lexer = Lexer(source)
264+
self._token_counter = 0
246265< 9E88 div class="diff-text-inner">
247266
def parse_name(self) -> NameNode:
248267
"""Convert a name lex token into a name parse node."""
@@ -546,7 +565,7 @@ def parse_value_literal(self, is_const: bool) -> ValueNode:
546565

547566
def parse_string_literal(self, _is_const: bool = False) -> StringValueNode:
548567
token = self._lexer.token
549-
self._lexer.advance()
568+
self.advance_lexer()
550569
return StringValueNode(
551570
value=token.value,
552571
block=token.kind == TokenKind.BLOCK_STRING,
@@ -583,18 +602,18 @@ def parse_object(self, is_const: bool) -> ObjectValueNode:
583602

584603
def parse_int(self, _is_const: bool = False) -> IntValueNode:
585604
token = self._lexer.token
586-
self._lexer.advance()
605+
self.advance_lexer()
587606
return IntValueNode(value=token.value, loc=self.loc(token))
588607

589608
def parse_float(self, _is_const: bool = False) -> FloatValueNode:
590609
token = self._lexer.token
591-
self._lexer.advance()
610+
self.advance_lexer()
592611
return FloatValueNode(value=token.value, loc=self.loc(token))
593612

594613
def parse_named_values(self, _is_const: bool = False) -> ValueNode:
595614
token = self._lexer.token
596615
value = token.value
597-
self._lexer.advance()
616+
self.advance_lexer()
598617
if value == "true":
599618
return BooleanValueNode(value=True, loc=self.loc(token))
600619
if value == "false":
@@ -1089,7 +1108,7 @@ def expect_token(self, kind: TokenKind) -> Token:
10891108
"""
10901109
token = self._lexer.token
10911110
if token.kind == kind:
1092-
self._lexer.advance()
1111+
self.advance_lexer()
10931112
return token
10941113

10951114
raise GraphQLSyntaxError(
@@ -1106,7 +1125,7 @@ def expect_optional_token(self, kind: TokenKind) -> bool:
11061125
"""
11071126
token = self._lexer.token
11081127
if token.kind == kind:
1109-
self._lexer.advance()
1128+
self.advance_lexer()
11101129
return True
11111130

11121131
return False
@@ -1119,7 +1138,7 @@ def expect_keyword(self, value: str) -> None:
11191138
"""
11201139
token = self._lexer.token
11211140
if token.kind == TokenKind.NAME and token.value == value:
1122-
self._lexer.advance()
1141+
self.advance_lexer()
11231142
else:
11241143
raise GraphQLSyntaxError(
11251144
self._lexer.source,
@@ -1135,7 +1154,7 @@ def expect_optional_keyword(self, value: str) -> bool:
11351154
"""
11361155
token = self._lexer.token
11371156
if token.kind == TokenKind.NAME and token.value == value:
1138-
self._lexer.advance()
1157+
self.advance_lexer()
11391158
return True
11401159

11411160
return False
@@ -1223,6 +1242,20 @@ def delimited_many(
12231242
break
12241243
return nodes
12251244

1245+
def advance_lexer(self) -> None:
1246+
"""Advance the lexer."""
1247+
token = self._lexer.advance()
1248+
max_tokens = self._max_tokens
1249+
if max_tokens is not None and token.kind is not TokenKind.EOF:
1250+
self._token_counter += 1
1251+
if self._token_counter > max_tokens:
1252+
raise GraphQLSyntaxError(
1253+
self._lexer.source,
1254+
token.start,
1255+
f"Document contains more that {max_tokens} tokens."
1256+
" Parsing aborted.",
1257+
)
1258+
12261259

12271260
def get_token_desc(token: Token) -> str:
12281261
"""Describe a token as a string for debugging."""

tests/language/test_parser.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,22 @@ def parse_provides_useful_error_when_using_source():
115115
"""
116116
)
117117

118+
def limits_maximum_number_of_tokens():
119+
parse("{ foo }", max_tokens=3)
120+
with raises(
121+
GraphQLSyntaxError,
122+
match="Syntax Error:"
123+
r" Document contains more that 2 tokens\. Parsing aborted\.",
124+
):
125+
parse("{ foo }", max_tokens=2)
126+
parse('{ foo(bar: "baz") }', max_tokens=8)
127+
with raises(
128+
GraphQLSyntaxError,
129+
match="Syntax Error:"
130+
r" Document contains more that 7 tokens\. Parsing aborted\.",
131+
):
132+
parse('{ foo(bar: "baz") }', max_tokens=7)
133+
118134
def parses_variable_inline_values():
119135
parse("{ field(complex: { a: { b: [ $var ] } }) }")
120136

0 commit comments

Comments
 (0)
0