8000 gh-61460: Stronger HMAC in multiprocessing (#20380) · python/cpython@3ed57e4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3ed57e4

Browse files
tirangpshead
andauthored
gh-61460: Stronger HMAC in multiprocessing (#20380)
bpo-17258: `multiprocessing` now supports stronger HMAC algorithms for inter-process connection authentication rather than only HMAC-MD5. Signed-off-by: Christian Heimes <christian@python.org> gpshead: I Reworked to be more robust while keeping the idea. The protocol modification idea remains, but we now take advantage of the message length as an indicator of legacy vs modern protocol version. No more regular expression usage. We now default to HMAC-SHA256, but do so in a way that will be compatible when communicating with older clients or older servers. No protocol transition period is needed. More integration tests to verify these claims remain true are required. I'm unaware of anyone depending on multiprocessing connections between different Python versions. --------- Signed-off-by: Christian Heimes <christian@python.org> Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
1 parent 12f1581 commit 3ed57e4

File tree

3 files changed

+200
-41
lines changed

3 files changed

+200
-41
lines changed

Lib/multiprocessing/connection.py

Lines changed: 151 additions & 31 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -722,11 +722,11 @@ def PipeClient(address):
722722
# Authentication stuff
723723
#
724724

725-
MESSAGE_LENGTH = 20
725+
MESSAGE_LENGTH = 40 # MUST be > 20
726726

727-
CHALLENGE = b'#CHALLENGE#'
728-
WELCOME = b'#WELCOME#'
729-
FAILURE = b'#FAILURE#'
727+
_CHALLENGE = b'#CHALLENGE#'
728+
_WELCOME = b'#WELCOME#'
729+
_FAILURE = b'#FAILURE#'
730730

731731
# multiprocessing.connection Authentication Handshake Protocol Description
732732
# (as documented for reference after reading the existing code)
@@ -750,7 +750,12 @@ def PipeClient(address):
750750
# ------------------------------ ---------------------------------------
751751
# 0. Open a connection on the pipe.
752752
# 1. Accept connection.
753-
# 2. New random 20 bytes -> MESSAGE
753+
# 2. Random 20+ bytes -> MESSAGE
754+
# Modern servers always send
755+
# more than 20 bytes and include
756+
# a {digest} prefix on it with
757+
# their preferred HMAC digest.
758+
# Legacy ones send ==20 bytes.
754759
# 3. send 4 byte length (net order)
755760
# prefix followed by:
756761
# b'#CHALLENGE#' + MESSAGE
@@ -763,14 +768,32 @@ def PipeClient(address):
763768
# 6. Assert that M1 starts with:
764769
# b'#CHALLENGE#'
765770
# 7. Strip that prefix from M1 into -> M2
766-
# 8. Compute HMAC-MD5 of AUTHKEY, M2 -> C_DIGEST
771+
# 7.1. Parse M2: if it is exactly 20 bytes in
772+
# length this indicates a legacy server
773+
# supporting only HMAC-MD5. Otherwise the
774+
# 7.2. preferred digest is looked up from an
775+
# expected "{digest}" prefix on M2. No prefix
776+
# or unsupported digest? <- AuthenticationError
777+
# 7.3. Put divined algorithm name in -> D_NAME
778+
# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST
767779
# 9. Send 4 byte length prefix (net order)
768780
# followed by C_DIGEST bytes.
769-
# 10. Compute HMAC-MD5 of AUTHKEY,
770-
# MESSAGE into -> M_DIGEST.
771-
# 11. Receive 4 or 4+8 byte length
781+
# 10. Receive 4 or 4+8 byte length
772782
# prefix (#4 dance) -> SIZE.
773-
# 12. Receive min(SIZE, 256) -> C_D.
783+
# 11. Receive min(SIZE, 256) -> C_D.
784+
# 11.1. Parse C_D: legacy servers
785+
# accept it as is, "md5" -> D_NAME
786+
# 11.2. modern servers check the length
787+
# of C_D, IF it is 16 bytes?
788+
# 11.2.1. "md5" -> D_NAME
789+
# and skip to step 12.
790+
# 11.3. longer? expect and parse a "{digest}"
791+
# prefix into -> D_NAME.
792+
# Strip the prefix and store remaining
793+
# bytes in -> C_D.
794+
# 11.4. Don't like D_NAME? <- AuthenticationError
795+
# 12. Compute HMAC-D_NAME of AUTHKEY,
796+
# MESSAGE into -> M_DIGEST.
774797
# 13. Compare M_DIGEST == C_D:
775798
# 14a: Match? Send length prefix &
776799
# b'#WELCOME#'
@@ -787,42 +810,139 @@ def PipeClient(address):
787810
#
788811
# If this RETURNed, the connection remains open: it has been authenticated.
789812
#
790-
# Length prefixes are used consistently even though every step so far has
791-
# always been a singular specific fixed length. This may help us evolve
792-
# the protocol in the future without breaking backwards compatibility.
793-
#
794-
# Similarly the initial challenge message from the serving side has always
795-
# been 20 bytes, but clients can accept a 100+ so using the length of the
796-
# opening challenge message as an indicator of protocol version may work.
813+
# Length prefixes are used consistently. Even on the legacy protocol, this
814+
# was good fortune and allowed us to evolve the protocol by using the length
815+
# of the opening challenge or length of the returned digest as a signal as
816+
# to which protocol the other end supports.
817+
818+
_ALLOWED_DIGESTS = frozenset(
819+
{b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'})
820+
_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS)
821+
822+
# Old hmac-md5 only server versions from Python <=3.11 sent a message of this
823+
# length. It happens to not match the length of any supported digest so we can
824+
# use a message of this length to indicate that we should work in backwards
825+
# compatible md5-only mode without a {digest_name} prefix on our response.
826+
_MD5ONLY_MESSAGE_LENGTH = 20
827+
_MD5_DIGEST_LEN = 16
828+
_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN)
829+
830+
831+
def _get_digest_name_and_payload(message: bytes) -> (str, bytes):
832+
"""Returns a digest name and the payload for a response hash.
833+
834+
If a legacy protocol is detected based on the message length
835+
or contents the digest name returned will be empty to indicate
836+
legacy mode where MD5 and no digest prefix should be sent.
837+
"""
838+
# modern message format: b"{digest}payload" longer than 20 bytes
839+
# legacy message format: 16 or 20 byte b"payload"
840+
if len(message) in _LEGACY_LENGTHS:
841+
# Either this was a legacy server challenge, or we're processing
842+
# a reply from a legacy client that sent an unprefixed 16-byte
843+
# HMAC-MD5 response. All messages using the modern protocol will
844+
# be longer than either of these lengths.
845+
return '', message
846+
if (message.startswith(b'{') and
847+
(curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0):
848+
digest = message[1:curly]
849+
if digest in _ALLOWED_DIGESTS:
850+
payload = message[curly+1:]
851+
return digest.decode('ascii'), payload
852+
raise AuthenticationError(
853+
'unsupported message length, missing digest prefix, '
854+
f'or unsupported digest: {message=}')
855+
856+
857+
def _create_response(authkey, message):
858+
"""Create a MAC based on authkey and message
859+
860+
The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or
861+
the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response
862+
is the raw MAC, otherwise the response is prefixed with '{digest_name}',
863+
e.g. b'{sha256}abcdefg...'
864+
865+
Note: The MAC protects the entire message including the digest_name prefix.
866+
"""
867+
import hmac
868+
digest_name = _get_digest_name_and_payload(message)[0]
869+
# The MAC protects the entire message: digest header and payload.
870+
if not digest_name:
871+
# Legacy server without a {digest} prefix on message.
872+
# Generate a legacy non-prefixed HMAC-MD5 reply.
873+
try:
874+
return hmac.new(authkey, message, 'md5').digest()
875+
except ValueError:
876+
# HMAC-MD5 is not available (FIPS mode?), fall back to
877+
# HMAC-SHA2-256 modern protocol. The legacy server probably
878+
# doesn't support it and will reject us anyways. :shrug:
879+
digest_name = 'sha256'
880+
# Modern protocol, indicate the digest used in the reply.
881+
response = hmac.new(authkey, message, digest_name).digest()
882+
return b'{%s}%s' % (digest_name.encode('ascii'), response)
883+
884+
885+
def _verify_challenge(authkey, message, response):
886+
"""Verify MAC challenge
887+
888+
If our message did not include a digest_name prefix, the client is allowed
889+
to select a stronger digest_name from _ALLOWED_DIGESTS.
890+
891+
In case our message is prefixed, a client cannot downgrade to a weaker
892+
algorithm, because the MAC is calculated over the entire message
893+
including the '{digest_name}' prefix.
894+
"""
895+
import hmac
896+
response_digest, response_mac = _get_digest_name_and_payload(response)
897+
response_digest = response_digest or 'md5'
898+
try:
899+
expected = hmac.new(authkey, message, response_digest).digest()
900+
except ValueError:
901+
raise AuthenticationError(f'{response_digest=} unsupported')
902+
if len(expected) != len(response_mac):
903+
raise AuthenticationError(
904+
f'expected {response_digest!r} of length {len(expected)} '
905+
f'got {len(response_mac)}')
906+
if not hmac.compare_digest(expected, response_mac):
907+
raise AuthenticationError('digest received was wrong')
797908

798909

799-
def deliver_challenge(connection, authkey):
800-
import hmac
910+
def deliver_challenge(connection, authkey: bytes, digest_name='sha256'):
801911
if not isinstance(authkey, bytes):
802912
raise ValueError(
803913
"Authkey must be bytes, not {0!s}".format(type(authkey)))
914+
assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint"
804915
message = os.urandom(MESSAGE_LENGTH)
805-
connection.send_bytes(CHALLENGE + message)
806-
digest = hmac.new(authkey, message, 'md5').digest()
916+
message = b'{%s}%s' % (digest_name.encode('ascii'), message)
917+
# Even when sending a challenge to a legacy client that does not support
918+
# digest prefixes, they'll take the entire thing as a challenge and
919+
# respond to it with a raw HMAC-MD5.
920+
connection.send_bytes(_CHALLENGE + message)
807921
response = connection.recv_bytes(256) # reject large message
808-
if response == digest:
809-
connection.send_bytes(WELCOME)
922+
try:
923+
_verify_challenge(authkey, message, response)
924+
except AuthenticationError:
925+
connection.send_bytes(_FAILURE)
926+
raise
810927
else:
811-
connection.send_bytes(FAILURE)
812-
raise AuthenticationError('digest received was wrong')
928+
connection.send_bytes(_WELCOME)
813929

814-
def answer_challenge(connection, authkey):
815-
import hmac
930+
931+
def answer_challenge(connection, authkey: bytes):
816932
if not isinstance(authkey, bytes):
817933
raise ValueError(
818934
"Authkey must be bytes, not {0!s}".format(type(authkey)))
819935
message = connection.recv_bytes(256) # reject large message
820-
assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message
821-
message = message[len(CHALLENGE):]
822-
digest = hmac.new(authkey, message, 'md5').digest()
936+
if not message.startswith(_CHALLENGE):
937+
raise AuthenticationError(
938+
f'Protocol error, expected challenge: {message=}')
939+
message = message[len(_CHALLENGE):]
940+
if len(message) < _MD5ONLY_MESSAGE_LENGTH:
941+
raise AuthenticationError('challenge too short: {len(message)} bytes')
942+
digest = _create_response(authkey, message)
823943
connection.send_bytes(digest)
824944
response = connection.recv_bytes(256) # reject large message
825-
if response != WELCOME:
945+
if response != _WELCOME:
826946
raise AuthenticationError('digest sent was rejected')
827947

828948
#

Lib/test/_test_multiprocessing.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import multiprocessing.managers
4949
import multiprocessing.pool
5050
import multiprocessing.queues
51+
from multiprocessing.connection import wait, AuthenticationError
5152

5253
from multiprocessing import util
5354

@@ -131,8 +132,6 @@ def _resource_unlink(name, rtype):
131132

132133
WIN32 = (sys.platform == "win32")
133134

134-
from multiprocessing.connection import wait
135-
136135
def wait_for_handle(handle, timeout):
137136
if timeout is not None and timeout < 0.0:
138137
timeout = None
@@ -3042,7 +3041,7 @@ def test_remote(self):
30423041
del queue
30433042

30443043

3045-
@hashlib_helper.requires_hashdigest('md5')
3044+
@hashlib_helper.requires_hashdigest('sha256')
30463045
class _TestManagerRestart(BaseTestCase):
30473046

30483047
@classmethod
@@ -3531,7 +3530,7 @@ def test_dont_merge(self):
35313530
#
35323531

35333532
@unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction")
3534-
@hashlib_helper.requires_hashdigest('md5')
3533+
@hashlib_helper.requires_hashdigest('sha256')
35353534
class _TestPicklingConnections(BaseTestCase):
35363535

35373536
ALLOWED_TYPES = ('processes',)
@@ -3834,7 +3833,7 @@ def test_copy(self):
38343833

38353834

38363835
@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
3837-
@hashlib_helper.requires_hashdigest('md5')
3836+
@hashlib_helper.requires_hashdigest('sha256')
38383837
class _TestSharedMemory(BaseTestCase):
38393838

38403839
ALLOWED_TYPES = ('processes',)
@@ -4636,7 +4635,7 @@ def test_invalid_handles(self):
46364635

46374636

46384637

4639-
@hashlib_helper.requires_hashdigest('md5')
4638+
@hashlib_helper.requires_hashdigest('sha256')
46404639
class OtherTest(unittest.TestCase):
46414640
# TODO: add more tests for deliver/answer challenge.
46424641
def test_deliver_challenge_auth_failure(self):
@@ -4656,7 +4655,7 @@ def __init__(self):
46564655
def recv_bytes(self, size):
46574656
self.count += 1
46584657
if self.count == 1:
4659-
return multiprocessing.connection.CHALLENGE
4658+
return multiprocessing.connection._CHALLENGE
46604659
elif self.count == 2:
46614660
return b'something bogus'
46624661
return b''
@@ -4666,14 +4665,52 @@ def send_bytes(self, data):
46664665
multiprocessing.connection.answer_challenge,
46674666
_FakeConnection(), b'abc')
46684667

4668+
4669+
@hashlib_helper.requires_hashdigest('md5')
4670+
@hashlib_helper.requires_hashdigest('sha256')
4671+
class ChallengeResponseTest(unittest.TestCase):
4672+
authkey = b'supadupasecretkey'
4673+
4674+
def create_response(self, message):
4675+
return multiprocessing.connection._create_response(
4676+
self.authkey, message
4677+
)
4678+
4679+
def verify_challenge(self, message, response):
4680+
return multiprocessing.connection._verify_challenge(
4681+
self.authkey, message, response
4682+
)
4683+
4684+
def test_challengeresponse(self):
4685+
for algo in [None, "md5", "sha256"]:
4686+
with self.subTest(f"{algo=}"):
4687+
msg = b'is-twenty-bytes-long' # The length of a legacy message.
4688+
if algo:
4689+
prefix = b'{%s}' % algo.encode("ascii")
4690+
else:
4691+
prefix = b''
4692+
msg = prefix + msg
4693+
response = self.create_response(msg)
4694+
if not response.startswith(prefix):
4695+
self.fail(response)
4696+
self.verify_challenge(msg, response)
4697+
4698+
# TODO(gpshead): We need integration tests for handshakes between modern
4699+
# deliver_challenge() and verify_response() code and connections running a
4700+
# test-local copy of the legacy Python <=3.11 implementations.
4701+
4702+
# TODO(gpshead): properly annotate tests for requires_hashdigest rather than
4703+
# only running these on a platform supporting everything. otherwise logic
4704+
# issues preventing it from working on FIPS mode setups will be hidden.
4705+
46694706
#
46704707
# Test Manager.start()/Pool.__init__() initializer feature - see issue 5585
46714708
#
46724709

46734710
def initializer(ns):
46744711
ns.test += 1
46754712

4676-
@hashlib_helper.requires_hashdigest('md5')
4713+
@hashlib_helper.requires_hashdigest('sha256')
46774714
class TestInitializers(unittest.TestCase):
46784715
def setUp(self):
46794716
self.mgr = multiprocessing.Manager()
@@ -5537,7 +5574,7 @@ def is_alive(self):
55375574
any(process.is_alive() for process in forked_processes))
55385575

55395576

5540-
@hashlib_helper.requires_hashdigest('md5')
5577+
@hashlib_helper.requires_hashdigest('sha256')
55415578
class TestSyncManagerTypes(unittest.TestCase):
55425579
"""Test all the types which can be shared between a parent and a
55435580
child process by using a manager which acts as an intermediary
@@ -5969,7 +6006,7 @@ def install_tests_in_module_dict(remote_globs, start_method):
59696006
class Temp(base, Mixin, unittest.TestCase):
59706007
pass
59716008
if type_ == 'manager':
5972-
Temp = hashlib_helper.requires_hashdigest('md5')(Temp)
6009+
Temp = hashlib_helper.requires_hashdigest('sha256')(Temp)
59736010
Temp.__name__ = Temp.__qualname__ = newname
59746011
Temp.__module__ = __module__
59756012
remote_globs[newname] = Temp
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`multiprocessing` now supports stronger HMAC algorithms for inter-process
2+
connection authentication rather than only HMAC-MD5.

0 commit comments

Comments
 (0)
0