8000 Add pipeline API by adriangb · Pull Request #9459 · pydantic/pydantic · GitHub
[go: up one dir, main page]

Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
e7eb43c
Add pipeline API
adriangb May 20, 2024
7d78407
port to python 3.10
adriangb May 20, 2024
6a1622d
port to python 3.10
adriangb May 20, 2024
947e920
fix syntax
adriangb May 20, 2024
b3fe1c4
handle slots
adriangb May 20, 2024
a6d56cc
Remove match
adriangb May 21, 2024
42505df
Remove match
adriangb May 21, 2024
b2855d0
ignore warning
adriangb May 21, 2024
aafb856
fix import
adriangb May 21, 2024
0bd8b39
fix union
adriangb May 21, 2024
697833f
fix union
adriangb May 21, 2024
d767731
sort imports
adriangb May 21, 2024
8abb6e4
move
adriangb May 21, 2024
217b11d
move
adriangb May 21, 2024
89c46a1
add missing file
adriangb May 21, 2024
6e91a32
namespace
adriangb May 23, 2024
8e4d535
initial tests
sydney-runkle May 29, 2024
ada5853
add more operators
adriangb May 30, 2024
8742e9e
Add json schema tests, add section mapping existing validators
adriangb May 31, 2024
f55b6e1
move things around for expeirmental pattern
sydney-runkle May 31, 2024
7132bae
fix docs tests
sydney-runkle May 31, 2024
0444fc9
maybe fix 3.9 test
sydney-runkle May 31, 2024
1a8e505
use typing Pattern
sydney-runkle May 31, 2024
d979841
add PydanticExperimentalWarning
sydney-runkle May 31, 2024
fadf3bb
ignore warnings, for some reason pytestmark wasn't working
sydney-runkle May 31, 2024
1699f35
3.8 friendly removesuffix
sydney-runkle May 31, 2024
d0a9372
Apply docs suggestions from code review
sydney-runkle Jun 4, 2024
bed0752
add __all__
adriangb Jun 4, 2024
eb61549
rename class to pipeline
adriangb Jun 4, 2024
a18a4df
get rid of on_lambda_err
adriangb Jun 4, 2024
34663fe
pr feedback
adriangb Jun 4, 2024
dff9ad9
make transform use the field type instead of any
adriangb Jun 4, 2024
479ab3c
add import
adriangb Jun 4, 2024
7b49219
rename parse() -> validate_as()
adriangb Jun 4, 2024
51bcad6
rename internal classes
adriangb Jun 4, 2024
13b1721
make Pipeline _Pipeline
adriangb Jun 4, 2024
b8573b5
Remove namespaces
adriangb Jun 4, 2024
888c4ed
more test
adriangb Jun 4, 2024
141c8b6
use ellipsis
sydney-runkle Jun 4, 2024
9d4194b
updating imports from internal test
sydney-runkle Jun 4, 2024
128d4ea
maybe fixing zoneinfo tests, switching up validate_as annotation again
sydney-runkle Jun 4, 2024
1c7302d
docs and linting
sydney-runkle Jun 4, 2024
88dcb75
removing tzinfo stuff :(
sydney-runkle Jun 5, 2024
19a3ee6
a bit more explanation
sydney-runkle Jun 5, 2024
0652472
api docs update
sydney-runkle Jun 5, 2024
4ccf4e5
Additional Test Cases for Experimental Pipeline API (#9566)
dAIsySHEng1 Jun 5, 2024
bad0a1a
fix common predicates + add tests
sydney-runkle Jun 5, 2024
a9d1099
remove unneeded line
sydney-runkle Jun 5, 2024
14e9944
update to version policy docs
sydney-runkle Jun 5, 2024
42a2708
skip linting
sydney-runkle Jun 5, 2024
021604f
fix type hint for _Pipeline.then
adriangb Jun 5, 2024
38a2730
Apply suggestions from code review
sydney-runkle Jun 5, 2024
8000
0c36b7c
Update pydantic/experimental/pipeline.py
sydney-runkle Jun 5, 2024
8d46b21
add public todo
sydney-runkle Jun 5, 2024
a46c2e3
move predicate up
sydney-runkle Jun 5, 2024
7386d69
new idea for overload
sydney-runkle Jun 5, 2024
dc07b50
test fixes
sydney-runkle Jun 5, 2024
cbb216b
update test cases with comments
sydney-runkle Jun 5, 2024
581cbe8
no freeze notes
sydney-runkle Jun 5, 2024
c3a008f
suggested frozen change
sydney-runkle Jun 5, 2024
26c5325
add test
adriangb Jun 5, 2024
166df3d
add more assertions
adriangb Jun 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
make Pipeline _Pipeline
  • Loading branch information
adriangb committed Jun 4, 2024
commit 13b172144ff5d18e3bf77a6040f4f42d9a97e4a6
122 changes: 61 additions & 61 deletions pydantic/experimental/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ class _Transform:

@dataclass(**slots_true)
class _PipelineOr:
left: Pipeline[Any, Any]
right: Pipeline[Any, Any]
left: _Pipeline[Any, Any]
right: _Pipeline[Any, Any]


@dataclass(**slots_true)
class _PipelineAnd:
left: Pipeline[Any, Any]
right: Pipeline[Any, Any]
left: _Pipeline[Any, Any]
right: _Pipeline[Any, Any]


_ConstraintAnnotation = Union[
Expand Down Expand Up @@ -88,93 +88,93 @@ class _FieldTypeMarker:


@dataclass(**slots_true)
class Pipeline(Generic[_InT, _OutT]):
class _Pipeline(Generic[_InT, _OutT]):
"""Abstract representation of a chain of validation, transformation, and parsing steps."""

_steps: list[_Step]

def transform(
self,
func: Callable[[_OutT], _NewOutT],
) -> Pipeline[_InT, _NewOutT]:
) -> _Pipeline[_InT, _NewOutT]:
"""Transform the output of the previous step.

If used as the first step in a pipeline, the type of the field is used.
That is, the transformation is applied to after the value is parsed to the field's type.
"""
return Pipeline[_InT, _NewOutT](self._steps + [_Transform(func)])
return _Pipeline[_InT, _NewOutT](self._steps + [_Transform(func)])

@overload
def validate_as(self, tp: type[_NewOutT], *, strict: bool = ...) -> Pipeline[_InT, _NewOutT]:
def validate_as(self, tp: type[_NewOutT], *, strict: bool = ...) -> _Pipeline[_InT, _NewOutT]:
...

@overload
def validate_as(self, *, strict: bool = ...) -> Pipeline[_InT, Any]:
def validate_as(self, *, strict: bool = ...) -> _Pipeline[_InT, Any]:
...

def validate_as(self, tp: Any = _FieldTypeMarker, *, strict: bool = False) -> Pipeline[_InT, Any]:
def validate_as(self, tp: Any = _FieldTypeMarker, *, strict: bool = False) -> _Pipeline[_InT, Any]:
"""Validate / parse the input into a new type.

If not type is provided, the type of the field is used.

Types are parsed in Pydantic's `lax` mode by default,
but you can enable `strict` mode by passing `strict=True`.
"""
return Pipeline[_InT, Any](self._steps + [_ValidateAs(tp, strict=strict)])
return _Pipeline[_InT, Any](self._steps + [_ValidateAs(tp, strict=strict)])

def validate_as_deferred(self, func: Callable[[], type[_NewOutT]]) -> Pipeline[_InT, _NewOutT]:
def validate_as_deferred(self, func: Callable[[], type[_NewOutT]]) -> _Pipeline[_InT, _NewOutT]:
"""Parse the input into a new type, deferring resolution of the type until the current class
is fully defined.

This is useful when you need to reference the class in it's own type annotations.
"""
return Pipeline[_InT, _NewOutT](self._steps + [_ValidateAsDefer(func)])
return _Pipeline[_InT, _NewOutT](self._steps + [_ValidateAsDefer(func)])

# constraints
@overload
def constrain(self: Pipeline[_InT, _NewOutGe], constraint: annotated_types.Ge) -> Pipeline[_InT, _NewOutGe]:
def constrain(self: _Pipeline[_InT, _NewOutGe], constraint: annotated_types.Ge) -> _Pipeline[_InT, _NewOutGe]:
...

@overload
def constrain(self: Pipeline[_InT, _NewOutGt], constraint: annotated_types.Gt) -> Pipeline[_InT, _NewOutGt]:
def constrain(self: _Pipeline[_InT, _NewOutGt], constraint: annotated_types.Gt) -> _Pipeline[_InT, _NewOutGt]:
...

@overload
def constrain(self: Pipeline[_InT, _NewOutLe], constraint: annotated_types.Le) -> Pipeline[_InT, _NewOutLe]:
def constrain(self: _Pipeline[_InT, _NewOutLe], constraint: annotated_types.Le) -> _Pipeline[_InT, _NewOutLe]:
...

@overload
def constrain(self: Pipeline[_InT, _NewOutLt], constraint: annotated_types.Lt) -> Pipeline[_InT, _NewOutLt]:
def constrain(self: _Pipeline[_InT, _NewOutLt], constraint: annotated_types.Lt) -> _Pipeline[_InT, _NewOutLt]:
...

@overload
def constrain(self: Pipeline[_InT, _NewOutLen], constraint: annotated_types.Len) -> Pipeline[_InT, _NewOutLen]:
def constrain(self: _Pipeline[_InT, _NewOutLen], constraint: annotated_types.Len) -> _Pipeline[_InT, _NewOutLen]:
...

@overload
def constrain(
self: Pipeline[_InT, _NewOutDiv], constraint: annotated_types.MultipleOf
) -> Pipeline[_InT, _NewOutDiv]:
self: _Pipeline[_InT, _NewOutDiv], constraint: annotated_types.MultipleOf
) -> _Pipeline[_InT, _NewOutDiv]:
...

@overload
def constrain(
self: Pipeline[_InT, _NewOutDatetime], constraint: annotated_types.Timezone
) -> Pipeline[_InT, _NewOutDatetime]:
self: _Pipeline[_InT, _NewOutDatetime], constraint: annotated_types.Timezone
) -> _Pipeline[_InT, _NewOutDatetime]:
...

@overload
def constrain(self: Pipeline[_InT, _OutT], constraint: annotated_types.Predicate) -> Pipeline[_InT, _OutT]:
def constrain(self: _Pipeline[_InT, _OutT], constraint: annotated_types.Predicate) -> _Pipeline[_InT, _OutT]:
...

@overload
def constrain(
self: Pipeline[_InT, _NewOutInterval], constraint: annotated_types.Interval
) -> Pipeline[_InT, _NewOutInterval]:
self: _Pipeline[_InT, _NewOutInterval], constraint: annotated_types.Interval
) -> _Pipeline[_InT, _NewOutInterval]:
...

@overload
def constrain(self: Pipeline[_InT, _NewOutT], constraint: Pattern[str]) -> Pipeline[_InT, _NewOutT]:
def constrain(self: _Pipeline[_InT, _NewOutT], constraint: Pattern[str]) -> _Pipeline[_InT, _NewOutT]:
...

def constrain(self, constraint: _ConstraintAnnotation) -> Any:
Expand All @@ -185,72 +185,72 @@ def constrain(self, constraint: _ConstraintAnnotation) -> Any:
Most of the time you'll be calling a shortcut method like `gt`, `lt`, `len`, etc
so you don't need to call this directly.
"""
return Pipeline[_InT, _OutT](self._steps + [_Constraint(constraint)])
return _Pipeline[_InT, _OutT](self._steps + [_Constraint(constraint)])

def gt(self: Pipeline[_InT, _NewOutGt], gt: _NewOutGt) -> Pipeline[_InT, _NewOutGt]:
def gt(self: _Pipeline[_InT, _NewOutGt], gt: _NewOutGt) -> _Pipeline[_InT, _NewOutGt]:
"""Constrain a value to be greater than a certain value."""
return self.constrain(annotated_types.Gt(gt))

def lt(self: Pipeline[_InT, _NewOutLt], lt: _NewOutLt) -> Pipeline[_InT, _NewOutLt]:
def lt(self: _Pipeline[_InT, _NewOutLt], lt: _NewOutLt) -> _Pipeline[_InT, _NewOutLt]:
"""Constrain a value to be less than a certain value."""
return self.constrain(annotated_types.Lt(lt))

def ge(self: Pipeline[_InT, _NewOutGe], ge: _NewOutGe) -> Pipeline[_InT, _NewOutGe]:
def ge(self: _Pipeline[_InT, _NewOutGe], ge: _NewOutGe) -> _Pipeline[_InT, _NewOutGe]:
"""Constrain a value to be greater than or equal to a certain value."""
return self.constrain(annotated_types.Ge(ge))

def le(self: Pipeline[_InT, _NewOutLe], le: _NewOutLe) -> Pipeline[_InT, _NewOutLe]:
def le(self: _Pipeline[_InT, _NewOutLe], le: _NewOutLe) -> _Pipeline[_InT, _NewOutLe]:
"""Constrain a value to be less than or equal to a certain value."""
return self.constrain(annotated_types.Le(le))

def len(self: Pipeline[_InT, _NewOutLen], min_len: int, max_len: int | None = None) -> Pipeline[_InT, _NewOutLen]:
def len(self: _Pipeline[_InT, _NewOutLen], min_len: int, max_len: int | None = None) -> _Pipeline[_InT, _NewOutLen]:
"""Constrain a value to have a certain length."""
return self.constrain(annotated_types.Len(min_len, max_len))

def multiple_of(self: Pipeline[_InT, _NewOutDiv], multiple_of: _NewOutDiv) -> Pipeline[_InT, _NewOutDiv]:
def multiple_of(self: _Pipeline[_InT, _NewOutDiv], multiple_of: _NewOutDiv) -> _Pipeline[_InT, _NewOutDiv]:
"""Constrain a value to be a multiple of a certain number."""
return self.constrain(annotated_types.MultipleOf(multiple_of))

def predicate(self: Pipeline[_InT, _NewOutT], func: Callable[[_NewOutT], bool]) -> Pipeline[_InT, _NewOutT]:
def predicate(self: _Pipeline[_InT, _NewOutT], func: Callable[[_NewOutT], bool]) -> _Pipeline[_InT, _NewOutT]:
"""Constrain a value to meet a certain predicate."""
return self.constrain(annotated_types.Predicate(func))

def not_in(self: Pipeline[_InT, _OutT], values: Container[_OutT]) -> Pipeline[_InT, _OutT]:
def not_in(self: _Pipeline[_InT, _OutT], values: Container[_OutT]) -> _Pipeline[_InT, _OutT]:
"""Constrain a value to not be in a certain set."""
return self.predicate(partial(operator.__contains__, values))

def in_(self: Pipeline[_InT, _OutT], values: Container[_OutT]) -> Pipeline[_InT, _OutT]:
def in_(self: _Pipeline[_InT, _OutT], values: Container[_OutT]) -> _Pipeline[_InT, _OutT]:
"""Constrain a value to be in a certain set."""
return self.predicate(partial(operator.__contains__, values))

def not_eq(self: Pipeline[_InT, _OutT], value: _OutT) -> Pipeline[_InT, _OutT]:
def not_eq(self: _Pipeline[_InT, _OutT], value: _OutT) -> _Pipeline[_InT, _OutT]:
"""Constrain a value to not be equal to a certain value."""
return self.predicate(partial(operator.__ne__, value))

def eq(self: Pipeline[_InT, _OutT], value: _OutT) -> Pipeline[_InT, _OutT]:
def eq(self: _Pipeline[_InT, _OutT], value: _OutT) -> _Pipeline[_InT, _OutT]:
"""Constrain a value to be equal to a certain value."""
return self.predicate(partial(operator.__eq__, value))

# timezone methods
@property
def dt(self: Pipeline[_InT, _NewOutDatetime]) -> _DateTimeValidator:
def dt(self: _Pipeline[_InT, _NewOutDatetime]) -> _DateTimeValidator:
return _DateTimeValidator(self._steps)

# string methods
@property
def str(self: Pipeline[_InT, _NewOutStr]) -> _StringValidator:
def str(self: _Pipeline[_InT, _NewOutStr]) -> _StringValidator:
return _StringValidator(self._steps)

# operators
def otherwise(self, other: Pipeline[_OtherIn, _OtherOut]) -> Pipeline[_InT | _OtherIn, _OutT | _OtherOut]:
def otherwise(self, other: _Pipeline[_OtherIn, _OtherOut]) -> _Pipeline[_InT | _OtherIn, _OutT | _OtherOut]:
"""Combine two validation chains, returning the result of the first chain if it succeeds, and the second chain if it fails."""
return Pipeline([_PipelineOr(self, other)])
return _Pipeline([_PipelineOr(self, other)])

__or__ = otherwise

def then(self, other: Pipeline[_OtherIn, _OtherOut]) -> Pipeline[_InT | _OtherIn, _OutT | _OtherOut]:
def then(self, other: _Pipeline[_OtherIn, _OtherOut]) -> _Pipeline[_InT | _OtherIn, _OutT | _OtherOut]:
"""Pipe the result of one validation chain into another."""
return Pipeline([_PipelineAnd(self, other)])
return _Pipeline([_PipelineAnd(self, other)])

__and__ = then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand why this API was added — as long as the input and output types of a pipeline are the same, then it's basically equivalent to do them in order. However, I'll note that sequencing the items like this may end up being unintuitive, in particular if you expect to get an error for each failure in the case of multiple independent validators, rather than just the first failure. I understand it's hard to "fix" that given that transformations are possible though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I take it that you mean that for validate_as(int).gt(0) & validate_as(int).gt(1) you’d expect -1 to give two errors? Indeed it would only give one. I think that’s reasonable behavior.

I also think you’re saying that for the case where you have a chain of constraints you could error for all of them eg validate_as(int).gt(0).gt(1) and indeed that will happen if they’re all one after another and are known constraints, but not if they’re custom predicates or there’s a transformation between them. And also not when you use &. I think that’s okay.

The one improvement we could make is “collapsing” sequential constraints into one level eg validate_as(int).predicate(lambda x: x > 0).predicate(lambda x: x % 2 == 0) could give both errors despite it being arbitrary user code. That’s a reasonable future feature request that shouldn’t be too hard to implement.

Copy link
Contributor
@dmontagu dmontagu Jun 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, my point was that I might expect validate_as(int).predicate(lambda x: x > 0) & validate_as(int).predicate(lambda x: x % 2 == 0) to produce two errors. (Well, knowing it's doing chaining, I'm not even sure if that's valid as written there since it repeats the validate_as(int), but that's what my intuition would be for how I'd expect to use &.) I think it's reasonable for validate_as(int).predicate(lambda x: x > 0).predicate(lambda x: x % 2 == 0) to just produce one though.

Copy link
Member Author
@adriangb adriangb Jun 5, 2024
6D3F

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My expectation would be the exact opposite: & is often a greedy operation e.g. False & 1 / 0. So I don't think there's a valid general intuition here. If we could make it behave as you expect I suspect there'd be complaints because it's unintuitive or because it is doing unnecessary work. I don't know why you'd expect validate_as(int).predicate(lambda x: x > 0).predicate(lambda x: x % 2 == 0) to produce just one error. Do you also expect Field(min_length=10, pattern=...) to produce a single error? In any case given that we can't really change the behavior and that no users have complained yet I suspect trying to determine what is most intuitive here is not a productive debate.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm late to the conversation, but I'd expect it to short-circuit the same way it would for an if expression or any other useage (that I know of) for and.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@grantmwilliams I think it works as you'd expect then, right?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adriangb I've only tested it a bit, but it seems to work exactly as expected.


Expand All @@ -269,48 +269,48 @@ def __get_pydantic_core_schema__(self, source_type: Any, handler: GetCoreSchemaH
return s


validate_as = Pipeline[Any, Any]([]).validate_as
parse_defer = Pipeline[Any, Any]([]).validate_as_deferred
transform = Pipeline[Any, Any]([_ValidateAs(_FieldTypeMarker)]).transform
validate_as = _Pipeline[Any, Any]([]).validate_as
parse_defer = _Pipeline[Any, Any]([]).validate_as_deferred
transform = _Pipeline[Any, Any]([_ValidateAs(_FieldTypeMarker)]).transform


class _StringValidator(Pipeline[str, str]):
def lower(self) -> Pipeline[str, str]:
class _StringValidator(_Pipeline[str, str]):
def lower(self) -> _Pipeline[str, str]:
return self.transform(str.lower)

def upper(self) -> Pipeline[str, str]:
def upper(self) -> _Pipeline[str, str]:
return self.transform(str.upper)

def title(self) -> Pipeline[str, str]:
def title(self) -> _Pipeline[str, str]:
return self.transform(str.title)

def strip(self) -> Pipeline[str, str]:
def strip(self) -> _Pipeline[str, str]:
return self.transform(str.strip)

def pattern(self, pattern: str) -> Pipeline[str, str]:
def pattern(self, pattern: str) -> _Pipeline[str, str]:
return self.constrain(re.compile(pattern))

def contains(self, substring: str) -> Pipeline[str, str]:
def contains(self, substring: str) -> _Pipeline[str, str]:
return self.predicate(lambda v: substring in v)

def starts_with(self, prefix: str) -> Pipeline[str, str]:
def starts_with(self, prefix: str) -> _Pipeline[str, str]:
return self.predicate(lambda v: v.startswith(prefix))

def ends_with(self, suffix: str) -> Pipeline[str, str]:
def ends_with(self, suffix: str) -> _Pipeline[str, str]:
return self.predicate(lambda v: v.endswith(suffix))


class _DateTimeValidator(Pipeline[datetime.datetime, datetime.datetime]):
def tz_naive(self) -> Pipeline[datetime.datetime, datetime.datetime]:
class _DateTimeValidator(_Pipeline[datetime.datetime, datetime.datetime]):
def tz_naive(self) -> _Pipeline[datetime.datetime, datetime.datetime]:
return self.constrain(annotated_types.Timezone(None))

def tz_aware(self) -> Pipeline[datetime.datetime, datetime.datetime]:
def tz_aware(self) -> _Pipeline[datetime.datetime, datetime.datetime]:
return self.constrain(annotated_types.Timezone(...))

def tz(self, tz: datetime.tzinfo) -> Pipeline[datetime.datetime, datetime.datetime]:
def tz(self, tz: datetime.tzinfo) -> _Pipeline[datetime.datetime, datetime.datetime]:
return self.constrain(annotated_types.Timezone(tz)) # type: ignore

def with_tz(self, tz: datetime.tzinfo | None) -> Pipeline[datetime.datetime, datetime.datetime]:
def with_tz(self, tz: datetime.tzinfo | None) -> _Pipeline[datetime.datetime, datetime.datetime]:
return self.transform(partial(datetime.datetime.replace, tzinfo=tz))


Expand Down
0