From 68b9d1892dc6007844a80acf5677cf6166bf5533 Mon Sep 17 00:00:00 2001 From: commonism Date: Mon, 5 Jun 2023 16:38:36 +0200 Subject: [PATCH 01/14] Exposing a version str & deprecating the email attribute (#110) * expose version as __version__ * assist ValidatedEmail.email deprecation --- email_validator/__init__.py | 4 ++-- email_validator/exceptions_types.py | 7 +++++++ email_validator/version.py | 1 + setup.cfg | 2 +- tests/test_main.py | 7 +++++++ 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 email_validator/version.py diff --git a/email_validator/__init__.py b/email_validator/__init__.py index d5f26a2..c3b5929 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -4,12 +4,12 @@ from .exceptions_types import ValidatedEmail, EmailNotValidError, \ EmailSyntaxError, EmailUndeliverableError from .validate_email import validate_email - +from .version import __version__ __all__ = ["validate_email", "ValidatedEmail", "EmailNotValidError", "EmailSyntaxError", "EmailUndeliverableError", - "caching_resolver"] + "caching_resolver", "__version__"] def caching_resolver(*args, **kwargs): diff --git a/email_validator/exceptions_types.py b/email_validator/exceptions_types.py index 9a1b331..ee9d50a 100644 --- a/email_validator/exceptions_types.py +++ b/email_validator/exceptions_types.py @@ -78,6 +78,13 @@ def __getattr__(self, key): return self.normalized raise AttributeError() + @property + def email(self): + import warnings + warnings.warn("ValidatedEmail.email is deprecated and will be removed, use ValidatedEmail.normalized instead", DeprecationWarning) + return self.normalized + + """For backwards compatibility, some fields are also exposed through a dict-like interface. Note that some of the names changed when they became attributes.""" def __getitem__(self, key): diff --git a/email_validator/version.py b/email_validator/version.py new file mode 100644 index 0000000..7857319 --- /dev/null +++ b/email_validator/version.py @@ -0,0 +1 @@ +__version__ = "2.0.0.post2" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index d299498..dc97892 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = email_validator -version = 2.0.0.post2 +version = attr: email_validator.version.__version__ description = A robust email address syntax and deliverability validation library. long_description = file: README.md long_description_content_type = text/markdown diff --git a/tests/test_main.py b/tests/test_main.py index e32af94..dc01bc1 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -58,3 +58,10 @@ def test_bytes_input(): input_email = "testaddr中example.tld".encode("utf32") with pytest.raises(EmailSyntaxError): validate_email(input_email, check_deliverability=False) + + +def test_deprecation(): + input_email = b"testaddr@example.tld" + valid_email = validate_email(input_email, check_deliverability=False) + with pytest.raises(DeprecationWarning): + assert valid_email.email is not None \ No newline at end of file From 5abaa7b4ce6677e5a2217db2e52202a760de3c24 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Mon, 5 Jun 2023 10:50:30 -0400 Subject: [PATCH 02/14] Fix tests and add CHANGLOG entry for last commit --- CHANGELOG.md | 6 ++++++ README.md | 2 +- email_validator/exceptions_types.py | 1 - email_validator/version.py | 2 +- tests/test_main.py | 2 +- tests/test_syntax.py | 8 ++++++-- 6 files changed, 15 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48a31c6..d7b02db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +In Development +-------------- + +* The old `email` field on the returned `ValidatedEmail` object, which in the previous version was superseded by `normalized`, will now raise a deprecation warning if used. See https://stackoverflow.com/q/879173 for strategies to suppress the DeprecationWarning. +* A `__version__` module attribute is added. + 2.0.0 (April 15, 2023) ---------------------- diff --git a/README.md b/README.md index 60077ca..72b3b0f 100644 --- a/README.md +++ b/README.md @@ -436,7 +436,7 @@ The package is distributed as a universal wheel and as a source package. To release: * Update CHANGELOG.md. -* Update the version number in setup.cfg. +* Update the version number in `email_validator/version.py`. * Make & push a commit with the new version number and make sure tests pass. * Make & push a tag (see command below). * Make a release at https://github.com/JoshData/python-email-validator/releases/new. diff --git a/email_validator/exceptions_types.py b/email_validator/exceptions_types.py index ee9d50a..4b8f200 100644 --- a/email_validator/exceptions_types.py +++ b/email_validator/exceptions_types.py @@ -84,7 +84,6 @@ def email(self): warnings.warn("ValidatedEmail.email is deprecated and will be removed, use ValidatedEmail.normalized instead", DeprecationWarning) return self.normalized - """For backwards compatibility, some fields are also exposed through a dict-like interface. Note that some of the names changed when they became attributes.""" def __getitem__(self, key): diff --git a/email_validator/version.py b/email_validator/version.py index 7857319..80476c8 100644 --- a/email_validator/version.py +++ b/email_validator/version.py @@ -1 +1 @@ -__version__ = "2.0.0.post2" \ No newline at end of file +__version__ = "2.0.0.post2" diff --git a/tests/test_main.py b/tests/test_main.py index dc01bc1..49a3a77 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -64,4 +64,4 @@ def test_deprecation(): input_email = b"testaddr@example.tld" valid_email = validate_email(input_email, check_deliverability=False) with pytest.raises(DeprecationWarning): - assert valid_email.email is not None \ No newline at end of file + assert valid_email.email is not None diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 57510e5..1c9659c 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -78,8 +78,12 @@ def test_email_valid(email_input, output): assert emailinfo == output assert validate_email(email_input, check_deliverability=False, allow_smtputf8=True) == output - # Check that the old way to access the normalized form still works. - assert emailinfo.email == emailinfo.normalized + # Check that the old `email` attribute to access the normalized form still works + # if the DeprecationWarning is suppressed. + import warnings + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + assert emailinfo.email == emailinfo.normalized @pytest.mark.parametrize( From dbf4618d62d9403eb5bebd00f869abebcda0d0e2 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Tue, 26 Sep 2023 06:39:42 -0400 Subject: [PATCH 03/14] Drop Python 3.7 and update GitHub Actions to build with Python 3.12.0-rc.3 flake8's latest version for Python 3.7 failed to parse f-strings correctly when running on Python 3.12, giving: --ignore=E501,E126,W503 email_validator tests email_validator/syntax.py:30:24: E231 missing whitespace after ':' email_validator/syntax.py:32:24: E231 missing whitespace after ':' ``` By dropping 3.7 we can update flake8 to work on all other versions. See https://github.com/actions/python-versions/releases for the Python versions supported in GitHub Actions. --- .github/workflows/test_and_build.yaml | 2 +- setup.cfg | 2 +- test_requirements.txt | 30 ++++++++++++--------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml index e80acd6..441d1ba 100644 --- a/.github/workflows/test_and_build.yaml +++ b/.github/workflows/test_and_build.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-alpha.5"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"] steps: - uses: actions/checkout@v3 diff --git a/setup.cfg b/setup.cfg index dc97892..2394d15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,11 +14,11 @@ classifiers = Intended Audience :: Developers License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Topic :: Software Development :: Libraries :: Python Modules keywords = email address validator diff --git a/test_requirements.txt b/test_requirements.txt index 5f11247..b41edca 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,5 +1,5 @@ # This file was generated by running: -# sudo docker run --rm -it --network=host python:3.7-slim /bin/bash +# sudo docker run --rm -it --network=host python:3.8-slim /bin/bash # pip install dnspython idna # from setup.cfg # pip install pytest pytest-cov coverage flake8 mypy # pip freeze @@ -7,24 +7,20 @@ # the earliest Python version we support, and some exception # messages may depend on package versions, so we pin versions # for reproducible testing.) -attrs==22.2.0 -coverage==7.2.1 -dnspython==2.3.0 -exceptiongroup==1.1.0 -flake8==5.0.4 +coverage==7.3.1 +dnspython==2.4.2 +exceptiongroup==1.1.3 +flake8==6.1.0 idna==3.4 -importlib-metadata==4.2.0 iniconfig==2.0.0 mccabe==0.7.0 -mypy==1.0.1 +mypy==1.5.1 mypy-extensions==1.0.0 -packaging==23.0 -pluggy==1.0.0 -pycodestyle==2.9.1 -pyflakes==2.5.0 -pytest==7.2.1 -pytest-cov==4.0.0 +packaging==23.1 +pluggy==1.3.0 +pycodestyle==2.11.0 +pyflakes==3.1.0 +pytest==7.4.2 +pytest-cov==4.1.0 tomli==2.0.1 -typed-ast==1.5.4 -typing_extensions==4.5.0 -zipp==3.15.0 +typing_extensions==4.8.0 From c52aaa2d17dc28f54608df160457c926b1341f44 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Wed, 18 Oct 2023 09:52:15 -0400 Subject: [PATCH 04/14] Fix incorrect test for DeprecationWarning added in 68b9d1892dc6007844a80acf5677cf6166bf5533 --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index 49a3a77..579163f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -63,5 +63,5 @@ def test_bytes_input(): def test_deprecation(): input_email = b"testaddr@example.tld" valid_email = validate_email(input_email, check_deliverability=False) - with pytest.raises(DeprecationWarning): + with pytest.deprecated_call(): assert valid_email.email is not None From 3aad01998b69a1da25141fdd04f9e60e7d38085a Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Wed, 18 Oct 2023 08:06:28 -0400 Subject: [PATCH 05/14] Update GitHub Actions to build with released version of Python 3.12 --- .github/workflows/test_and_build.yaml | 2 +- CHANGELOG.md | 1 + README.md | 2 +- test_requirements.txt | 8 ++++---- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml index 441d1ba..5268a2b 100644 --- a/.github/workflows/test_and_build.yaml +++ b/.github/workflows/test_and_build.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12.0"] steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index d7b02db..4961fc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ In Development -------------- +* Python 3.8+ is now required (support for Python 3.7 was dropped). * The old `email` field on the returned `ValidatedEmail` object, which in the previous version was superseded by `normalized`, will now raise a deprecation warning if used. See https://stackoverflow.com/q/879173 for strategies to suppress the DeprecationWarning. * A `__version__` module attribute is added. diff --git a/README.md b/README.md index 72b3b0f..1652fcf 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ email-validator: Validate Email Addresses ========================================= A robust email address syntax and deliverability validation library for -Python 3.7+ by [Joshua Tauberer](https://joshdata.me). +Python 3.8+ by [Joshua Tauberer](https://joshdata.me). This library validates that a string is of the form `name@example.com` and optionally checks that the domain name is set up to receive email. diff --git a/test_requirements.txt b/test_requirements.txt index b41edca..db9bbbd 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -7,18 +7,18 @@ # the earliest Python version we support, and some exception # messages may depend on package versions, so we pin versions # for reproducible testing.) -coverage==7.3.1 +coverage==7.3.2 dnspython==2.4.2 exceptiongroup==1.1.3 flake8==6.1.0 idna==3.4 iniconfig==2.0.0 mccabe==0.7.0 -mypy==1.5.1 +mypy==1.6.1 mypy-extensions==1.0.0 -packaging==23.1 +packaging==23.2 pluggy==1.3.0 -pycodestyle==2.11.0 +pycodestyle==2.11.1 pyflakes==3.1.0 pytest==7.4.2 pytest-cov==4.1.0 From 2a9653b8701db574e4ac69451908aef18eddab82 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sun, 2 Jul 2023 09:47:25 -0400 Subject: [PATCH 06/14] Remove .travis.yml since we now do CI with GitHub actions --- .travis.yml | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3a283dc..0000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -os: linux -dist: bionic -language: python -cache: pip - -python: -- '3.7' -- '3.8' -- '3.9' -- '3.10' -- '3.11' -- '3.12-dev' - -install: -- make install - -script: -- make typing -- make lint -- make test - -after_success: -- bash <(curl -s https://codecov.io/bash) From 371c12079fa10cd3f93ba68aaf149070a7119d2b Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sun, 2 Jul 2023 09:47:53 -0400 Subject: [PATCH 07/14] Mark the email address argument to validate_email as positional-only --- CHANGELOG.md | 1 + email_validator/validate_email.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4961fc5..744644f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ In Development * Python 3.8+ is now required (support for Python 3.7 was dropped). * The old `email` field on the returned `ValidatedEmail` object, which in the previous version was superseded by `normalized`, will now raise a deprecation warning if used. See https://stackoverflow.com/q/879173 for strategies to suppress the DeprecationWarning. * A `__version__` module attribute is added. +* The email address argument to validate_email is now marked as positional-only to better reflect the documented usage using the new Python 3.8 feature. 2.0.0 (April 15, 2023) ---------------------- diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py index 0d8f581..b33394a 100644 --- a/email_validator/validate_email.py +++ b/email_validator/validate_email.py @@ -7,8 +7,8 @@ def validate_email( email: Union[str, bytes], - # /, # not supported in Python 3.6, 3.7 - *, + /, # prior arguments are positional-only + *, # subsequent arguments are keyword-only allow_smtputf8: Optional[bool] = None, allow_empty_local: bool = False, allow_quoted_local: Optional[bool] = None, From 814b4884a6d6a804bd344101b0c9999d12f6d828 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sun, 2 Jul 2023 10:26:25 -0400 Subject: [PATCH 08/14] Use the new Python 3.8 walrus operator and simplify some if statements --- email_validator/syntax.py | 9 +++------ email_validator/validate_email.py | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/email_validator/syntax.py b/email_validator/syntax.py index abb4ea9..7287476 100644 --- a/email_validator/syntax.py +++ b/email_validator/syntax.py @@ -69,8 +69,7 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp # so if it was originally quoted (quoted_local_part is True) and this regex matches, # it's ok. # (RFC 5321 4.1.2 / RFC 5322 3.2.4). - m = DOT_ATOM_TEXT.match(local) - if m: + if DOT_ATOM_TEXT.match(local): # It's valid. And since it's just the permitted ASCII characters, # it's normalized and safe. If the local part was originally quoted, # the quoting was unnecessary and it'll be returned as normalized to @@ -89,8 +88,7 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp # RFC 6531 section 3.3. valid: Optional[str] = None requires_smtputf8 = False - m = DOT_ATOM_TEXT_INTL.match(local) - if m: + if DOT_ATOM_TEXT_INTL.match(local): # But international characters in the local part may not be permitted. if not allow_smtputf8: # Check for invalid characters against the non-internationalized @@ -347,8 +345,7 @@ def validate_email_domain_name(domain, test_environment=False, globally_delivera # Check the syntax of the string returned by idna.encode. # It should never fail. - m = DOT_ATOM_TEXT_HOSTNAME.match(ascii_domain) - if not m: + if not DOT_ATOM_TEXT_HOSTNAME.match(ascii_domain): raise EmailSyntaxError("The email address contains invalid characters after the @-sign after IDNA encoding.") # Check the length of the domain name in bytes. diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py index b33394a..0d2e7a8 100644 --- a/email_validator/validate_email.py +++ b/email_validator/validate_email.py @@ -58,8 +58,7 @@ def validate_email( # part if the local part is quoted. If the address is quoted, # split it at a non-escaped @-sign and unescape the escaping. quoted_local_part = False - m = QUOTED_LOCAL_PART_ADDR.match(email) - if m: + if m := QUOTED_LOCAL_PART_ADDR.match(email): quoted_local_part = True local_part, domain_part = m.groups() From 786defc7c20e22341b338f56c8334eb4c7fce711 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Thu, 19 Oct 2023 07:16:26 -0400 Subject: [PATCH 09/14] Improve some code comments, refactor some code, mention length checks in the README --- README.md | 8 +++ email_validator/syntax.py | 83 ++++++++++++++++++++++++--- email_validator/validate_email.py | 93 +++++++++---------------------- tests/test_syntax.py | 4 +- 4 files changed, 111 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 1652fcf..251e7a8 100644 --- a/README.md +++ b/README.md @@ -315,6 +315,14 @@ they are unnecessary. For IPv6 domain literals, the IPv6 address is normalized to condensed form. [RFC 2142](https://datatracker.ietf.org/doc/html/rfc2142) also requires lowercase normalization for some specific mailbox names like `postmaster@`. +### Length checks + +This library checks that the length of the email address is not longer than +the maximum length. The check is performed on the normalized form of the +address, which might be different from a string provided by a user. If you +send email to the original string and not the normalized address, the email +might be rejected because the original address could be too long. + Examples -------- diff --git a/email_validator/syntax.py b/email_validator/syntax.py index 7287476..fef785b 100644 --- a/email_validator/syntax.py +++ b/email_validator/syntax.py @@ -1,7 +1,8 @@ from .exceptions_types import EmailSyntaxError from .rfc_constants import EMAIL_MAX_LENGTH, LOCAL_PART_MAX_LENGTH, DOMAIN_MAX_LENGTH, \ DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT_RE, ATEXT_INTL_RE, ATEXT_HOSTNAME_INTL, QTEXT_INTL, \ - DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX, DOMAIN_LITERAL_CHARS + DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX, DOMAIN_LITERAL_CHARS, \ + QUOTED_LOCAL_PART_ADDR import re import unicodedata @@ -10,6 +11,35 @@ from typing import Optional +def split_email(email): + # Return the local part and domain part of the address and + # whether the local part was quoted as a three-tuple. + + # Typical email addresses have a single @-sign, but the + # awkward "quoted string" local part form (RFC 5321 4.1.2) + # allows @-signs (and escaped quotes) to appear in the local + # part if the local part is quoted. If the address is quoted, + # split it at a non-escaped @-sign and unescape the escaping. + if m := QUOTED_LOCAL_PART_ADDR.match(email): + local_part, domain_part = m.groups() + + # Since backslash-escaping is no longer needed because + # the quotes are removed, remove backslash-escaping + # to return in the normalized form. + import re + local_part = re.sub(r"\\(.)", "\\1", local_part) + + return local_part, domain_part, True + + else: + # Split at the one and only at-sign. + parts = email.split('@') + if len(parts) != 2: + raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.") + local_part, domain_part = parts + return local_part, domain_part, False + + def get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH): """Helper function to return an error message related to invalid length.""" diff = len(addr) - limit @@ -367,7 +397,7 @@ def validate_email_domain_name(domain, test_environment=False, globally_delivera raise EmailSyntaxError(f"After the @-sign, periods cannot be separated by so many characters {reason}.") if globally_deliverable: - # All publicly deliverable addresses have domain named with at least + # All publicly deliverable addresses have domain names with at least # one period, at least for gTLDs created since 2013 (per the ICANN Board # New gTLD Program Committee, https://www.icann.org/en/announcements/details/new-gtld-dotless-domain-names-prohibited-30-8-2013-en). # We'll consider the lack of a period a syntax error @@ -428,7 +458,48 @@ def validate_email_domain_name(domain, test_environment=False, globally_delivera } -def validate_email_domain_literal(domain_literal, allow_domain_literal=False): +def validate_email_length(addrinfo): + # If the email address has an ASCII representation, then we assume it may be + # transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to + # the destination) and the length limit applies to ASCII characters (which is + # the same as octets). The number of characters in the internationalized form + # may be many fewer (because IDNA ASCII is verbose) and could be less than 254 + # Unicode characters, and of course the number of octets over the limit may + # not be the number of characters over the limit, so if the email address is + # internationalized, we can't give any simple information about why the address + # is too long. + if addrinfo.ascii_email and len(addrinfo.ascii_email) > EMAIL_MAX_LENGTH: + if addrinfo.ascii_email == addrinfo.normalized: + reason = get_length_reason(addrinfo.ascii_email) + elif len(addrinfo.normalized) > EMAIL_MAX_LENGTH: + # If there are more than 254 characters, then the ASCII + # form is definitely going to be too long. + reason = get_length_reason(addrinfo.normalized, utf8=True) + else: + reason = "(when converted to IDNA ASCII)" + raise EmailSyntaxError(f"The email address is too long {reason}.") + + # In addition, check that the UTF-8 encoding (i.e. not IDNA ASCII and not + # Unicode characters) is at most 254 octets. If the addres is transmitted using + # SMTPUTF8, then the length limit probably applies to the UTF-8 encoded octets. + # If the email address has an ASCII form that differs from its internationalized + # form, I don't think the internationalized form can be longer, and so the ASCII + # form length check would be sufficient. If there is no ASCII form, then we have + # to check the UTF-8 encoding. The UTF-8 encoding could be up to about four times + # longer than the number of characters. + # + # See the length checks on the local part and the domain. + if len(addrinfo.normalized.encode("utf8")) > EMAIL_MAX_LENGTH: + if len(addrinfo.normalized) > EMAIL_MAX_LENGTH: + # If there are more than 254 characters, then the UTF-8 + # encoding is definitely going to be too long. + reason = get_length_reason(addrinfo.normalized, utf8=True) + else: + reason = "(when encoded in bytes)" + raise EmailSyntaxError(f"The email address is too long {reason}.") + + +def validate_email_domain_literal(domain_literal): # This is obscure domain-literal syntax. Parse it and return # a compressed/normalized address. # RFC 5321 4.1.3 and RFC 5322 3.4.1. @@ -441,8 +512,6 @@ def validate_email_domain_literal(domain_literal, allow_domain_literal=False): addr = ipaddress.IPv4Address(domain_literal) except ValueError as e: raise EmailSyntaxError(f"The address in brackets after the @-sign is not valid: It is not an IPv4 address ({e}) or is missing an address literal tag.") - if not allow_domain_literal: - raise EmailSyntaxError("A bracketed IPv4 address after the @-sign is not allowed here.") # Return the IPv4Address object and the domain back unchanged. return { @@ -456,8 +525,6 @@ def validate_email_domain_literal(domain_literal, allow_domain_literal=False): addr = ipaddress.IPv6Address(domain_literal[5:]) except ValueError as e: raise EmailSyntaxError(f"The IPv6 address in brackets after the @-sign is not valid ({e}).") - if not allow_domain_literal: - raise EmailSyntaxError("A bracketed IPv6 address after the @-sign is not allowed here.") # Return the IPv6Address object and construct a normalized # domain literal. @@ -466,6 +533,8 @@ def validate_email_domain_literal(domain_literal, allow_domain_literal=False): "domain": f"[IPv6:{addr.compressed}]", } + # Nothing else is valid. + if ":" not in domain_literal: raise EmailSyntaxError("The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.") diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py index 0d2e7a8..d2791fe 100644 --- a/email_validator/validate_email.py +++ b/email_validator/validate_email.py @@ -1,8 +1,8 @@ from typing import Optional, Union from .exceptions_types import EmailSyntaxError, ValidatedEmail -from .syntax import validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, get_length_reason -from .rfc_constants import EMAIL_MAX_LENGTH, QUOTED_LOCAL_PART_ADDR, CASE_INSENSITIVE_MAILBOX_NAMES +from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length +from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES def validate_email( @@ -20,9 +20,9 @@ def validate_email( dns_resolver: Optional[object] = None ) -> ValidatedEmail: """ - Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of - information when the address is valid. The email argument can be a str or a bytes instance, - but if bytes it must be ASCII-only. This is the main method of this library. + Given an email address, and some options, returns a ValidatedEmail instance + with information about the address if it is valid or, if the address is not + valid, raises an EmailNotValidError. This is the main function of the module. """ # Fill in default values of arguments. @@ -52,26 +52,13 @@ def validate_email( except ValueError: raise EmailSyntaxError("The email address is not valid ASCII.") - # Typical email addresses have a single @-sign, but the - # awkward "quoted string" local part form (RFC 5321 4.1.2) - # allows @-signs (and escaped quotes) to appear in the local - # part if the local part is quoted. If the address is quoted, - # split it at a non-escaped @-sign and unescape the escaping. - quoted_local_part = False - if m := QUOTED_LOCAL_PART_ADDR.match(email): - quoted_local_part = True - local_part, domain_part = m.groups() - - # Remove backslashes. - import re - local_part = re.sub(r"\\(.)", "\\1", local_part) - - else: - # Split at the one and only at-sign. - parts = email.split('@') - if len(parts) != 2: - raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.") - local_part, domain_part = parts + # Split the address into the local part (before the @-sign) + # and the domain part (after the @-sign). Normally, there + # is only one @-sign. But the awkward "quoted string" local + # part form (RFC 5321 4.1.2) allows @-signs in the local + # part if the local part is quoted. + local_part, domain_part, is_quoted_local_part \ + = split_email(email) # Collect return values in this instance. ret = ValidatedEmail() @@ -84,13 +71,17 @@ def validate_email( local_part_info = validate_email_local_part(local_part, allow_smtputf8=allow_smtputf8, allow_empty_local=allow_empty_local, - quoted_local_part=quoted_local_part) - if quoted_local_part and not allow_quoted_local: - raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.") + quoted_local_part=is_quoted_local_part) ret.local_part = local_part_info["local_part"] ret.ascii_local_part = local_part_info["ascii_local_part"] ret.smtputf8 = local_part_info["smtputf8"] + # If a quoted local part isn't allowed but is present, now raise an exception. + # This is done after any exceptions raised by validate_email_local_part so + # that mandatory checks have highest precedence. + if is_quoted_local_part and not allow_quoted_local: + raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.") + # Some local parts are required to be case-insensitive, so we should normalize # to lowercase. # RFC 2142 @@ -107,7 +98,9 @@ def validate_email( elif domain_part.startswith("[") and domain_part.endswith("]"): # Parse the address in the domain literal and get back a normalized domain. - domain_part_info = validate_email_domain_literal(domain_part[1:-1], allow_domain_literal=allow_domain_literal) + domain_part_info = validate_email_domain_literal(domain_part[1:-1]) + if not allow_domain_literal: + raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.") ret.domain = domain_part_info["domain"] ret.ascii_domain = domain_part_info["domain"] # Domain literals are always ASCII. ret.domain_address = domain_part_info["domain_address"] @@ -131,48 +124,12 @@ def validate_email( else: ret.ascii_email = None - # If the email address has an ASCII representation, then we assume it may be - # transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to - # the destination) and the length limit applies to ASCII characters (which is - # the same as octets). The number of characters in the internationalized form - # may be many fewer (because IDNA ASCII is verbose) and could be less than 254 - # Unicode characters, and of course the number of octets over the limit may - # not be the number of characters over the limit, so if the email address is - # internationalized, we can't give any simple information about why the address - # is too long. - # - # In addition, check that the UTF-8 encoding (i.e. not IDNA ASCII and not - # Unicode characters) is at most 254 octets. If the addres is transmitted using - # SMTPUTF8, then the length limit probably applies to the UTF-8 encoded octets. - # If the email address has an ASCII form that differs from its internationalized - # form, I don't think the internationalized form can be longer, and so the ASCII - # form length check would be sufficient. If there is no ASCII form, then we have - # to check the UTF-8 encoding. The UTF-8 encoding could be up to about four times - # longer than the number of characters. - # - # See the length checks on the local part and the domain. - if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH: - if ret.ascii_email == ret.normalized: - reason = get_length_reason(ret.ascii_email) - elif len(ret.normalized) > EMAIL_MAX_LENGTH: - # If there are more than 254 characters, then the ASCII - # form is definitely going to be too long. - reason = get_length_reason(ret.normalized, utf8=True) - else: - reason = "(when converted to IDNA ASCII)" - raise EmailSyntaxError(f"The email address is too long {reason}.") - if len(ret.normalized.encode("utf8")) > EMAIL_MAX_LENGTH: - if len(ret.normalized) > EMAIL_MAX_LENGTH: - # If there are more than 254 characters, then the UTF-8 - # encoding is definitely going to be too long. - reason = get_length_reason(ret.normalized, utf8=True) - else: - reason = "(when encoded in bytes)" - raise EmailSyntaxError(f"The email address is too long {reason}.") + # Check the length of the address. + validate_email_length(ret) if check_deliverability and not test_environment: # Validate the email address's deliverability using DNS - # and update the return dict with metadata. + # and update the returned ValidatedEmail object with metadata. if is_domain_literal: # There is nothing to check --- skip deliverability checks. diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 1c9659c..8709845 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -330,9 +330,9 @@ def test_domain_literal(): ('me@xn--0.tld', 'The part after the @-sign is not valid IDNA (Invalid A-label).'), ('me@yy--0.tld', 'An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.'), ('me@yy--0.tld', 'An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.'), - ('me@[127.0.0.1]', 'A bracketed IPv4 address after the @-sign is not allowed here.'), + ('me@[127.0.0.1]', 'A bracketed IP address after the @-sign is not allowed here.'), ('me@[127.0.0.999]', 'The address in brackets after the @-sign is not valid: It is not an IPv4 address (Octet 999 (> 255) not permitted in \'127.0.0.999\') or is missing an address literal tag.'), - ('me@[IPv6:::1]', 'A bracketed IPv6 address after the @-sign is not allowed here.'), + ('me@[IPv6:::1]', 'A bracketed IP address after the @-sign is not allowed here.'), ('me@[IPv6:::G]', 'The IPv6 address in brackets after the @-sign is not valid (Only hex digits permitted in \'G\' in \'::G\').'), ('me@[tag:text]', 'The part after the @-sign contains an invalid address literal tag in brackets.'), ('me@[untaggedtext]', 'The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.'), From 2c3501e367040978c4ecc00ddb7290ec50376aea Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sat, 21 Oct 2023 06:47:29 -0400 Subject: [PATCH 10/14] Fixes to debug helper ValidatedEmail.as_constructor --- email_validator/exceptions_types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/email_validator/exceptions_types.py b/email_validator/exceptions_types.py index 4b8f200..3f6409c 100644 --- a/email_validator/exceptions_types.py +++ b/email_validator/exceptions_types.py @@ -76,7 +76,7 @@ def __getattr__(self, key): return self.original if key == "email": return self.normalized - raise AttributeError() + raise AttributeError(key) @property def email(self): @@ -129,9 +129,10 @@ def as_constructor(self): + ",".join("\n {}={}".format( key, repr(getattr(self, key))) - for key in ('email', 'local_part', 'domain', + for key in ('normalized', 'local_part', 'domain', 'ascii_email', 'ascii_local_part', 'ascii_domain', 'smtputf8', 'mx', 'mx_fallback_type') + if hasattr(self, key) ) \ + ")" From c1f37d6fe8b4604579ee3b3f22255263e0822e50 Mon Sep 17 00:00:00 2001 From: PriteshJadhav132 <126667465+PriteshJadhav132@users.noreply.github.com> Date: Sat, 21 Oct 2023 22:09:20 +0530 Subject: [PATCH 11/14] Remove typo in README (#117) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 251e7a8..5912e0e 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ while True: ### Test addresses -This library rejects email addresess that use the [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml) `invalid`, `localhost`, `test`, and some others by raising `EmailSyntaxError`. This is to protect your system from abuse: You probably don't want a user to be able to cause an email to be sent to `localhost` (although they might be able to still do so via a malicious MX record). However, in your non-production test environments you may want to use `@test` or `@myname.test` email addresses. There are three ways you can allow this: +This library rejects email addresses that use the [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml) `invalid`, `localhost`, `test`, and some others by raising `EmailSyntaxError`. This is to protect your system from abuse: You probably don't want a user to be able to cause an email to be sent to `localhost` (although they might be able to still do so via a malicious MX record). However, in your non-production test environments you may want to use `@test` or `@myname.test` email addresses. There are three ways you can allow this: 1. Add `test_environment=True` to the call to `validate_email` (see above). 2. Set `email_validator.TEST_ENVIRONMENT` to `True` globally. From 36b06110f6b177410fc0f53126b5998620542693 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sun, 22 Oct 2023 07:25:16 -0400 Subject: [PATCH 12/14] 2.1.0 --- CHANGELOG.md | 4 ++-- README.md | 2 +- email_validator/version.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 744644f..02d2277 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ -In Development --------------- +2.1.0 (October 22, 2023) +------------------------ * Python 3.8+ is now required (support for Python 3.7 was dropped). * The old `email` field on the returned `ValidatedEmail` object, which in the previous version was superseded by `normalized`, will now raise a deprecation warning if used. See https://stackoverflow.com/q/879173 for strategies to suppress the DeprecationWarning. diff --git a/README.md b/README.md index 5912e0e..2c27c35 100644 --- a/README.md +++ b/README.md @@ -451,7 +451,7 @@ To release: * Publish a source and wheel distribution to pypi (see command below). ```sh -git tag v$(grep version setup.cfg | sed "s/.*= //") +git tag v$(cat email_validator/version.py | sed "s/.* = //" | sed 's/"//g') git push --tags ./release_to_pypi.sh ``` diff --git a/email_validator/version.py b/email_validator/version.py index 80476c8..9aa3f90 100644 --- a/email_validator/version.py +++ b/email_validator/version.py @@ -1 +1 @@ -__version__ = "2.0.0.post2" +__version__ = "2.1.0" From c3e81090641f3a1e96975ed150e0a416f6a96cb5 Mon Sep 17 00:00:00 2001 From: tianwei Date: Mon, 23 Oct 2023 18:02:47 +0800 Subject: [PATCH 13/14] Drop Python 3.7 from python_requires (#118) --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2394d15..a69971d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,7 @@ packages = find: install_requires = dnspython>=2.0.0 # optional if deliverability check isn't needed idna>=2.0.0 -python_requires = >=3.7 +python_requires = >=3.8 [options.package_data] * = py.typed From fd655c0967feb64f537db9342cf55c44b87fb80e Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Mon, 23 Oct 2023 06:04:32 -0400 Subject: [PATCH 14/14] 2.1.0.post1 --- email_validator/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/email_validator/version.py b/email_validator/version.py index 9aa3f90..acc96f7 100644 --- a/email_validator/version.py +++ b/email_validator/version.py @@ -1 +1 @@ -__version__ = "2.1.0" +__version__ = "2.1.0.post1"