@@ -22,6 +22,29 @@ def _pack_integer(data, pos, value):
22
22
data [pos :pos + 4 ] = _pack_integer_func (value )
23
23
24
24
25
+ def _read_all_values (data , used = 0 ):
26
+ """Yield (key, value, pos). No locking is performed."""
27
+
28
+ if used <= 0 :
29
+ # If not valid `used` value is passed in, read it from the file.
30
+ used = _unpack_integer (data , 0 )[0 ]
31
+
32
+ pos = 8
33
+
34
+ while pos < used :
35
+ encoded_len = _unpack_integer (data , pos )[0 ]
36
+ # check we are not reading beyond bounds
37
+ if encoded_len + pos > used :
38
+ raise RuntimeError ('Read beyond file size detected, file is corrupted.' )
39
+ pos += 4
40
+ encoded_key = data [pos : pos + encoded_len ]
41
+ padded_len = encoded_len + (8 - (encoded_len + 4 ) % 8 )
42
+ pos += padded_len
43
+ value = _unpack_double (data , pos )[0 ]
44
+ yield encoded_key .decode ('utf-8' ), value , pos
45
+ pos += 8
46
+
47
+
25
48
class MmapedDict (object ):
26
49
"""A dict of doubles, backed by an mmapped file.
27
50
@@ -55,6 +78,12 @@ def __init__(self, filename, read_mode=False):
55
78
for key , _ , pos in self ._read_all_values ():
56
79
self ._positions [key ] = pos
57
80
81
+ @staticmethod
82
+ def read_all_values_from_file (filename ):
83
+ with open (filename , 'rb' ) as infp :
84
+ data = infp .read ()
85
+ return _read_all_values (data )
86
+
58
87
def _init_value (self , key ):
59
88
"""Initialize a value. Lock must be held by caller."""
60
89
encoded = key .encode ('utf-8' )
@@ -74,30 +103,10 @@ def _init_value(self, key):
74
103
75
104
def _read_all_values (self ):
76
105
"""Yield (key, value, pos). No locking is performed."""
77
-
78
- pos = 8
79
-
80
- # cache variables to local ones and prevent attributes lookup
81
- # on every loop iteration
82
- used = self ._used
83
- data = self ._m
84
-
85
- while pos < used :
86
- encoded_len = _unpack_integer (data , pos )[0 ]
87
- # check we are not reading beyond bounds
88
- if encoded_len + pos > used :
89
- msg = 'Read beyond file size detected, %s is corrupted.'
90
- raise RuntimeError (msg % self ._fname )
91
- pos += 4
92
- encoded_key = data [pos : pos + encoded_len ]
93
- padded_len = encoded_len + (8 - (encoded_len + 4 ) % 8 )
94
- pos += padded_len
95
- value = _unpack_double (data , pos )[0 ]
96
- yield encoded_key .decode ('utf-8' ), value , pos
97
- pos += 8
106
+ return _read_all_values (data = self ._m , used = self ._used )
98
107
99
108
def read_all_values (self ):
100
- """Yield (key, value, pos ). No locking is performed."""
109
+ """Yield (key, value). No locking is performed."""
101
110
for k , v , _ in self ._read_all_values ():
102
111
yield k , v
103
112
0 commit comments