diff --git a/.travis.yml b/.travis.yml index 74b263e..100a5ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ python: - '3.6' - '3.7' - '3.8' +- '3.9' install: - make install diff --git a/README.md b/README.md index 2ef35de..929b525 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Key features: login forms or other uses related to identifying users. * Gives friendly error messages when validation fails (appropriate to show to end users). -* (optionally) Checks deliverability: Does the domain name resolve? +* (optionally) Checks deliverability: Does the domain name resolve? And you can override the default DNS resolver. * Supports internationalized domain names and (optionally) internationalized local parts. * Normalizes email addresses (super important for internationalized @@ -69,23 +69,27 @@ This validates the address and gives you its normalized form. You should put the normalized form in your database and always normalize before checking if an address is in your database. -The validator will accept internationalized email addresses, but email -addresses with non-ASCII characters in the *local* part of the address -(before the @-sign) require the -[SMTPUTF8](https://tools.ietf.org/html/rfc6531) extension which may not -be supported by your mail submission library or your outbound mail -server. If you know ahead of time that SMTPUTF8 is not supported then -**add the keyword argument allow\_smtputf8=False to fail validation for -addresses that would require SMTPUTF8**: +When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call: ```python -valid = validate_email(email, allow_smtputf8=False) +from email_validator import validate_email, caching_resolver + +resolver = caching_resolver(timeout=10) + +while True: + valid = validate_email(email, dns_resolver=resolver) ``` +The validator will accept internationalized email addresses, but not all +mail systems can send email to an addresses with non-ASCII characters in +the *local* part of the address (before the @-sign). See the `allow_smtputf8` +option below. + + Overview -------- -The module provides a single function `validate_email(email_address)` which +The module provides a function `validate_email(email_address)` which takes an email address (either a `str` or ASCII `bytes`) and: - Raises a `EmailNotValidError` with a helpful, human-readable error @@ -128,6 +132,9 @@ shown): `allow_empty_local=False`: Set to `True` to allow an empty local part (i.e. `@example.com`), e.g. for validating Postfix aliases. + +`dns_resolver=None`: Pass an instance of [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to control the DNS resolver including setting a timeout and [a cache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html). The `caching_resolver` function shown above is a helper function to construct a dns.resolver.Resolver with a [LRUCache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html#dns.resolver.LRUCache). Reuse the same resolver instance across calls to `validate_email` to make use of the cache. + Internationalized email addresses --------------------------------- diff --git a/email_validator/__init__.py b/email_validator/__init__.py index ded7899..f960f67 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -180,12 +180,20 @@ def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH): return reason.format(prefix, diff, suffix) +def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None): + resolver = dns.resolver.Resolver() + resolver.cache = cache or dns.resolver.LRUCache() + resolver.lifetime = timeout # timeout, in seconds + return resolver + + def validate_email( email, allow_smtputf8=True, allow_empty_local=False, check_deliverability=True, timeout=DEFAULT_TIMEOUT, + dns_resolver=None ): """ Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of @@ -273,7 +281,9 @@ def validate_email( if check_deliverability: # Validate the email address's deliverability and update the # return dict with metadata. - deliverability_info = validate_email_deliverability(ret["domain"], ret["domain_i18n"], timeout) + deliverability_info = validate_email_deliverability( + ret["domain"], ret["domain_i18n"], timeout, dns_resolver + ) if "mx" in deliverability_info: ret.mx = deliverability_info["mx"] ret.mx_fallback_type = deliverability_info["mx-fallback"] @@ -443,15 +453,22 @@ def validate_email_domain_part(domain): } -def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT): +def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT, dns_resolver=None): # Check that the domain resolves to an MX record. If there is no MX record, # try an A or AAAA record which is a deprecated fallback for deliverability. - def dns_resolver_resolve_shim(resolver, domain, record): + # If no dns.resolver.Resolver was given, get dnspython's default resolver. + # Override the default resolver's timeout. This may affect other uses of + # dnspython in this process. + if dns_resolver is None: + dns_resolver = dns.resolver.get_default_resolver() + dns_resolver.lifetime = timeout + + def dns_resolver_resolve_shim(domain, record): try: # dns.resolver.Resolver.resolve is new to dnspython 2.x. # https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.resolve - return resolver.resolve(domain, record) + return dns_resolver.resolve(domain, record) except AttributeError: # dnspython 2.x is only available in Python 3.6 and later. For earlier versions # of Python, we maintain compatibility with dnspython 1.x which has a @@ -460,7 +477,7 @@ def dns_resolver_resolve_shim(resolver, domain, record): # which we prevent by adding a "." to the domain name to make it absolute. # dns.resolver.Resolver.query is deprecated in dnspython version 2.x. # https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.query - return resolver.query(domain + ".", record) + return dns_resolver.query(domain + ".", record) try: # We need a way to check how timeouts are handled in the tests. So we @@ -469,28 +486,23 @@ def dns_resolver_resolve_shim(resolver, domain, record): if getattr(validate_email_deliverability, 'TEST_CHECK_TIMEOUT', False): raise dns.exception.Timeout() - resolver = dns.resolver.get_default_resolver() - - if timeout: - resolver.lifetime = timeout - try: # Try resolving for MX records and get them in sorted priority order. - response = dns_resolver_resolve_shim(resolver, domain, "MX") + response = dns_resolver_resolve_shim(domain, "MX") mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response]) mx_fallback = None except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # If there was no MX record, fall back to an A record. try: - response = dns_resolver_resolve_shim(resolver, domain, "A") + response = dns_resolver_resolve_shim(domain, "A") mtas = [(0, str(r)) for r in response] mx_fallback = "A" except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # If there was no A record, fall back to an AAAA record. try: - response = dns_resolver_resolve_shim(resolver, domain, "AAAA") + response = dns_resolver_resolve_shim(domain, "AAAA") mtas = [(0, str(r)) for r in response] mx_fallback = "AAAA" except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): diff --git a/setup.cfg b/setup.cfg index f6ab14a..43ce496 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,42 @@ -[bdist_wheel] -universal = 1 - [metadata] +name = email_validator +version = 1.1.3 +description = A robust email syntax and deliverability validation library for Python 2.x/3.x. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/JoshData/python-email-validator +author = Joshua Tauberer +author_email = jt@occams.info +license = CC0 (copyright waived) license_file = LICENSE +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication + Programming Language :: Python :: 2 + Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Topic :: Software Development :: Libraries :: Python Modules +keywords = email address validator + +[options] +packages = find: +install_requires = + dnspython>=1.15.0 + idna>=2.0.0 +python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* + +[options.entry_points] +console_scripts = + email_validator=email_validator:main + +[bdist_wheel] +universal = 1 [flake8] max-line-length = 120 diff --git a/setup.py b/setup.py index 28dae7f..8bf1ba9 100644 --- a/setup.py +++ b/setup.py @@ -1,49 +1,2 @@ -# -*- coding: utf-8 -*- - -from setuptools import setup, find_packages -from codecs import open - -setup( - name='email-validator', - version='1.1.2', - - description='A robust email syntax and deliverability validation library for Python 2.x/3.x.', - long_description=open("README.md", encoding='utf-8').read(), - long_description_content_type="text/markdown", - url='https://github.com/JoshData/python-email-validator', - - author=u'Joshua Tauberer', - author_email=u'jt@occams.info', - license='CC0 (copyright waived)', - - # See https://pypi.org/pypi?%3Aaction=list_classifiers - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', - - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries :: Python Modules', - - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], - - keywords="email address validator", - - packages=find_packages(), - install_requires=[ - "idna>=2.0.0", - "dnspython>=1.15.0"], - - entry_points={ - 'console_scripts': [ - 'email_validator=email_validator:main', - ], - }, -) +from setuptools import setup +setup() diff --git a/tests/test_main.py b/tests/test_main.py index af975ba..d2fd923 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,7 +1,9 @@ +from unittest import mock +import dns.resolver import pytest from email_validator import EmailSyntaxError, EmailUndeliverableError, \ validate_email, validate_email_deliverability, \ - ValidatedEmail + caching_resolver, ValidatedEmail # Let's test main but rename it to be clear from email_validator import main as validator_main @@ -344,3 +346,27 @@ def test_main_output_shim(monkeypatch, capsys): # The \n is part of the print statement, not part of the string, which is what the b'...' is # Since we're mocking py 2.7 here instead of actually using 2.7, this was the closest I could get assert stdout == "b'An email address cannot have a period immediately after the @-sign.'\n" + + +@mock.patch("dns.resolver.LRUCache.put") +def test_validate_email__with_caching_resolver(mocked_put): + dns_resolver = caching_resolver() + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_put.called + + with mock.patch("dns.resolver.LRUCache.get") as mocked_get: + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_get.called + + +@mock.patch("dns.resolver.LRUCache.put") +def test_validate_email__with_configured_resolver(mocked_put): + dns_resolver = dns.resolver.Resolver() + dns_resolver.lifetime = 10 + dns_resolver.cache = dns.resolver.LRUCache(max_size=1000) + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_put.called + + with mock.patch("dns.resolver.LRUCache.get") as mocked_get: + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_get.called