@@ -22,6 +22,29 @@ def _pack_integer(data, pos, value):
2222 data [pos :pos + 4 ] = _pack_integer_func (value )
2323
2424
25+ def _read_all_values (data , used = 0 ):
26+ """Yield (key, value, pos). No locking is performed."""
27+
28+ if used <= 0 :
29+ # If not valid `used` value is passed in, read it from the file.
30+ used = _unpack_integer (data , 0 )[0 ]
31+
32+ pos = 8
33+
34+ while pos < used :
35+ encoded_len = _unpack_integer (data , pos )[0 ]
36+ # check we are not reading beyond bounds
37+ if encoded_len + pos > used :
38+ raise RuntimeError ('Read beyond file size detected, file is corrupted.' )
39+ pos += 4
40+ encoded_key = data [pos : pos + encoded_len ]
41+ padded_len = encoded_len + (8 - (encoded_len + 4 ) % 8 )
42+ pos += padded_len
43+ value = _unpack_double (data , pos )[0 ]
44+ yield encoded_key .decode ('utf-8' ), value , pos
45+ pos += 8
46+
47+
2548class MmapedDict (object ):
2649 """A dict of doubles, backed by an mmapped file.
2750
@@ -55,6 +78,12 @@ def __init__(self, filename, read_mode=False):
5578 for key , _ , pos in self ._read_all_values ():
5679 self ._positions [key ] = pos
5780
81+ @staticmethod
82+ def read_all_values_from_file (filename ):
83+ with open (filename , 'rb' ) as infp :
84+ data = infp .read ()
85+ return _read_all_values (data )
86+
5887 def _init_value (self , key ):
5988 """Initialize a value. Lock must be held by caller."""
6089 encoded = key .encode ('utf-8' )
@@ -74,30 +103,10 @@ def _init_value(self, key):
74103
75104 def _read_all_values (self ):
76105 """Yield (key, value, pos). No locking is performed."""
77-
78- pos = 8
79-
80- # cache variables to local ones and prevent attributes lookup
81- # on every loop iteration
82- used = self ._used
83- data = self ._m
84-
85- while pos < used :
86- encoded_len = _unpack_integer (data , pos )[0 ]
87- # check we are not reading beyond bounds
88- if encoded_len + pos > used :
89- msg = 'Read beyond file size detected, %s is corrupted.'
90- raise RuntimeError (msg % self ._fname )
91- pos += 4
92- encoded_key = data [pos : pos + encoded_len ]
93- padded_len = encoded_len + (8 - (encoded_len + 4 ) % 8 )
94- pos += padded_len
95- value = _unpack_double (data , pos )[0 ]
96- yield encoded_key .decode ('utf-8' ), value , pos
97- pos += 8
106+ return _read_all_values (data = self ._m , used = self ._used )
98107
99108 def read_all_values (self ):
100- """Yield (key, value, pos ). No locking is performed."""
109+ """Yield (key, value). No locking is performed."""
101110 for k , v , _ in self ._read_all_values ():
102111 yield k , v
103112
0 commit comments