8000 gh-128150: Improve performances of `uuid.uuid*` constructor functions… · python/cpython@6ff8f82 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6ff8f82

Browse files
authored
gh-128150: Improve performances of uuid.uuid* constructor functions. (#128151)
We introduce a private constructor `UUID._from_int()` for RFC 4122/9562 UUIDs, which takes the integral UUID value as input. The latter must have correctly set its variant and version bits. We also make `UUID.__init__()` slightly more efficient.
1 parent 39fc7ef commit 6ff8f82

File tree

3 files changed

+76
-27
lines changed

3 files changed

+76
-27
lines changed

Doc/whatsnew/3.14.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,22 @@ io
717717
file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in
718718
:gh:`120754` and :gh:`90102`.)
719719

720+
721+
uuid
722+
----
723+
724+
* Improve generation of :class:`~uuid.UUID` objects via their dedicated
725+
functions:
726+
727+
* :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are both roughly 40% faster
728+
for 16-byte names and 20% faster for 1024-byte names. Performance for
729+
longer names remains unchanged.
730+
* :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 40% faster
731+
respectively.
732+
733+
(Contributed by Bénédikt Tran in :gh:`128150`.)
734+
735+
720736
Deprecated
721737
==========
722738

Lib/uuid.py

Lines changed: 58 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ class SafeUUID:
8585
unknown = None
8686

8787

88+
_UINT_128_MAX = (1 << 128) - 1
89+
# 128-bit mask to clear the variant and version bits of a UUID integral value
90+
_RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48))
91+
# RFC 4122 variant bits and version bits to activate on a UUID integral value.
92+
_RFC_4122_VERSION_1_FLAGS = ((1 << 76) | (0x8000 << 48))
93+
_RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48))
94+
_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
95+
_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
96+
_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
97+
98+
8899
class UUID:
89100
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
90101
UUID objects are immutable, hashable, and usable as dictionary keys.
@@ -174,57 +185,69 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
174185
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
175186
raise TypeError('one of the hex, bytes, bytes_le, fields, '
176187
'or int arguments must be given')
177-
if hex is not None:
188+
if int is not None:
189+
pass
190+
elif hex is not None:
178191
hex = hex.replace('urn:', '').replace('uuid:', '')
179192
hex = hex.strip('{}').replace('-', '')
180193
if len(hex) != 32:
181194
raise ValueError('badly formed hexadecimal UUID string')
182195
int = int_(hex, 16)
183-
if bytes_le is not None:
196+
elif bytes_le is not None:
184197
if len(bytes_le) != 16:
185198
raise ValueError('bytes_le is not a 16-char string')
199+
assert isinstance(bytes_le, bytes_), repr(bytes_le)
186200
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
187201
bytes_le[8-1:6-1:-1] + bytes_le[8:])
188-
if bytes is not None:
202+
int = int_.from_bytes(bytes) # big endian
203+
elif bytes is not None:
189204
if len(bytes) != 16:
190205
raise ValueError('bytes is not a 16-char string')
191206
assert isinstance(bytes, bytes_), repr(bytes)
192207
int = int_.from_bytes(bytes) # big endian
193-
if fields is not None:
208+
elif fields is not None:
194209
if len(fields) != 6:
195210
raise ValueError('fields is not a 6-tuple')
196211
(time_low, time_mid, time_hi_version,
197212
clock_seq_hi_variant, clock_seq_low, node) = fields
198-
if not 0 <= time_low < 1<<32:
213+
if not 0 <= time_low < (1 << 32):
199214
raise ValueError('field 1 out of range (need a 32-bit value)')
200-
if not 0 <= time_mid < 1<<16:
215+
if not 0 <= time_mid < (1 << 16):
201216
raise ValueError('field 2 out of range (need a 16-bit value)')
202-
if not 0 <= time_hi_version < 1<<16:
217+
if not 0 <= time_hi_version < (1 << 16):
203218
raise ValueError('field 3 out of range (need a 16-bit value)')
204-
if not 0 <= clock_seq_hi_variant < 1<<8:
219+
if not 0 <= clock_seq_hi_variant < (1 << 8):
205220
raise ValueError('field 4 out of range (need an 8-bit value)')
206-
if not 0 <= clock_seq_low < 1<<8:
221+
if not 0 <= clock_seq_low < (1 << 8):
207222
raise ValueError('field 5 out of range (need an 8-bit value)')
208-
if not 0 <= node < 1<<48:
223+
if not 0 <= node < (1 << 48):
209224
raise ValueError('field 6 out of range (need a 48-bit value)')
210225
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
211226
int = ((time_low << 96) | (time_mid << 80) |
212227
(time_hi_version << 64) | (clock_seq << 48) | node)
213-
if int is not None:
214-
if not 0 <= int < 1<<128:
215-
raise ValueError('int is out of range (need a 128-bit value)')
228+
if not 0 <= int <= _UINT_128_MAX:
229+
raise ValueError('int is out of range (need a 128-bit value)')
216230
if version is not None:
217231
if not 1 <= version <= 8:
218232
raise ValueError('illegal version number')
233+
# clear the variant and the version number bits
234+
int &= _RFC_4122_CLEARFLAGS_MASK
219235
# Set the variant to RFC 4122/9562.
220-
int &= ~(0xc000 << 48)
221-
int |= 0x8000 << 48
236+
int |= 0x8000_0000_0000_0000 # (0x8000 << 48)
222237
# Set the version number.
223-
int &= ~(0xf000 << 64)
224238
int |= version << 76
225239
object.__setattr__(self, 'int', int)
226240
object.__setattr__(self, 'is_safe', is_safe)
227241

242+
@classmethod
243+
def _from_int(cls, value):
244+
"""Create a UUID from an integer *value*. Internal use only."""
245+
assert 0 <= value <= _UINT_128_MAX, repr(value)
246+
self = object.__new__(cls)
247+
object.__setattr__(self, 'int', value)
248+
object.__setattr__(self, 'is_safe', SafeUUID.unknown)
249+
return self
250+
228251
def __getstate__(self):
229252
d = {'int': self.int}
230253
if self.is_safe != SafeUUID.unknown:
@@ -700,24 +723,30 @@ def uuid3(namespace, name):
700723
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
701724
if isinstance(name, str):
702725
name = bytes(name, "utf-8")
703-
from hashlib import md5
704-
digest = md5(
705-
namespace.bytes + name,
706-
usedforsecurity=False
707-
).digest()
708-
return UUID(bytes=digest[:16], version=3)
726+
import hashlib
727+
h = hashlib.md5(namespace.bytes + name, usedforsecurity=False)
728+
int_uuid_3 = int.from_bytes(h.digest())
729+
int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK
730+
int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS
731+
return UUID._from_int(int_uuid_3)
709732

710733
def uuid4():
711734
"""Generate a random UUID."""
712-
return UUID(bytes=os.urandom(16), version=4)
735+
int_uuid_4 = int.from_bytes(os.urandom(16))
736+
int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK
737+
int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS
738+
return UUID._from_int(int_uuid_4)
713739

714740
def uuid5(namespace, name):
715741
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
716742
if isinstance(name, str):
717743
name = bytes(name, "utf-8")
718-
from hashlib import sha1
719-
hash = sha1(namespace.bytes + name).digest()
720-
return UUID(bytes=hash[:16], version=5)
744+
import hashlib
745+
h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False)
746+
int_uuid_5 = int.from_bytes(h.digest()[:16])
747+
int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK
748+
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
749+
return UUID._from_int(int_uuid_5)
721750

722751
def uuid8(a=None, b=None, c=None):
723752
"""Generate a UUID from three custom blocks.
@@ -740,7 +769,9 @@ def uuid8(a=None, b=None, c=None):
740769
int_uuid_8 = (a & 0xffff_ffff_ffff) << 80
741770
int_uuid_8 |= (b & 0xfff) << 64
742771
int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff
743-
return UUID(int=int_uuid_8, version=8)
772+
# by construction, the variant and version bits are already cleared
773+
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
774+
return UUID._from_int(int_uuid_8)
744775

745776
def main():
746777
"""Run the uuid command line interface."""
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8
2+
via their dedicated functions by 30%. Patch by Bénédikt Tran.

0 commit comments

Comments
 (0)
0