10BC0 Add regex patterns to JSON schema for `Decimal` type by Dima-Bulavenko · Pull Request #11987 · pydantic/pydantic · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions docs/concepts/json_schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,13 @@ print(Model.model_json_schema(mode='validation'))
{
'properties': {
'a': {
'anyOf': [{'type': 'number'}, {'type': 'string'}],
'anyOf': [
{'type': 'number'},
{
'pattern': '^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$',
'type': 'string',
},
],
'default': '12.34',
'title': 'A',
}
Expand All @@ -289,7 +295,14 @@ print(Model.model_json_schema(mode='validation'))
print(Model.model_json_schema(mode='serialization'))
"""
{
'properties': {'a': {'default': '12.34', 'title': 'A', 'type': 'string'}},
'properties': {
'a': {
'default': '12.34',
'pattern': '^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$',
'title': 'A',
'type': 'string',
}
},
'title': 'Model',
'type': 'object',
}
Expand Down
44 changes: 43 additions & 1 deletion pydantic/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,49 @@ def decimal_schema(self, schema: core_schema.DecimalSchema) -> JsonSchemaValue:
Returns:
The generated JSON schema.
"""
json_schema = self.str_schema(core_schema.str_schema())

def get_decimal_pattern(schema: core_schema.DecimalSchema) -> str:
max_digits = schema.get('max_digits')
decimal_places = schema.get('decimal_places')

pattern = (
r'^(?!^[-+.]*$)[+-]?0*' # check it is not empty string and not one or sequence of ".+-" characters.
)

# Case 1: Both max_digits and decimal_places are set
if max_digits is not None and decimal_places is not None:
integer_places = max(0, max_digits - decimal_places)
pattern += (
rf'(?:'
rf'\d{{0,{integer_places}}}'
rf'|'
rf'(?=[\d.]{{1,{max_digits + 1}}}0*$)'
rf'\d{{0,{integer_places}}}\.\d{{0,{decimal_places}}}0*$'
rf')'
)

# Case 2: Only max_digits is set
elif max_digits is not None and decimal_places is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With max_digits set to e.g. 5, it wrongfully matches '1000.1111111', etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your feedback.

From my testing, the pattern appears to work correctly when max_digits=5 and the invalid value '1000.1111111' is rejected as expected.

Here is a test demonstrating this behavior:

@pytest.fixture
def get_decimal_pattern():
    def pattern(max_digits=None, decimal_places=None) -> str:
        field = TypeAdapter(Annotated[Decimal, Field(max_digits=max_digits, decimal_places=decimal_places)])
        return field.json_schema()['anyOf'][1]['pattern']
    return pattern

@pytest.mark.parametrize('invalid_decimal', ['1000.1111111'])
def test_only_max_digits_set(invalid_decimal, get_decimal_pattern):
    pattern = get_decimal_pattern(max_digits=5, decimal_places=None)
    assert re.fullmatch(pattern, invalid_decimal) is None

Let me know if there's a specific case I may have missed!

pattern += (
rf'(?:'
rf'\d{{0,{max_digits}}}'
rf'|'
rf'(?=[\d.]{{1,{max_digits + 1}}}0*$)'
rf'\d*\.\d*0*$'
rf')'
)

# Case 3: Only decimal_places is set
elif max_digits is None and decimal_places is not None:
pattern += rf'\d*\.?\d{{0,{decimal_places}}}0*$'

# Case 4: Both are None (no restrictions)
else:
pattern += r'\d*\.?\d*$' # look for arbitrary integer or decimal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allows '.' as an input, which can't be validated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing this out.
However, r'\d*\.?\d*$' is not the full pattern used for validation.

The complete pattern includes an additional regex component that prevents '.' from being a valid input:

pattern = (
r'^(?!^[-+.]*$)[+-]?0*' # check it is not empty string and not one or sequence of ".+-" characters.
)

Additionally, the edge case for '.' is covered by the following test:

@pytest.mark.parametrize('invalid_decimal', ['.', '-.', '..', '1.1.1', '0.0.0', '1..1', '-', '--'])
def test_decimal_pattern_reject_invalid_with_decimal_places_max_digits_unset(invalid_decimal, get_decimal_pattern):
pattern = get_decimal_pattern()
assert re.fullmatch(pattern, invalid_decimal) is None


return pattern

json_schema = self.str_schema(core_schema.str_schema(pattern=get_decimal_pattern(schema)))
if self.mode == 'validation':
multiple_of = schema.get('multiple_of')
le = schema.get('le')
Expand Down
Loading
Loading
0