@@ -22,6 +22,29 @@ def _pack_integer(data, pos, value):
2222 data[pos:pos + 4] = _pack_integer_func(value)
2323
2424
25+ def _read_all_values(data, used=0):
26+ """Yield (key, value, pos). No locking is performed."""
27+
28+ if used <= 0:
29+ # If not valid `used` value is passed in, read it from the file.
30+ used = _unpack_integer(data, 0)[0]
31+
32+ pos = 8
33+
34+ while pos < used:
35+ encoded_len = _unpack_integer(data, pos)[0]
36+ # check we are not reading beyond bounds
37+ if encoded_len + pos > used:
38+ raise RuntimeError('Read beyond file size detected, file is corrupted.')
39+ pos += 4
40+ encoded_key = data[pos : pos + encoded_len]
41+ padded_len = encoded_len + (8 - (encoded_len + 4) % 8)
42+ pos += padded_len
43+ value = _unpack_double(data, pos)[0]
44+ yield encoded_key.decode('utf-8'), value, pos
45+ pos += 8
46+
47+
2548class MmapedDict(object):
2649 """A dict of doubles, backed by an mmapped file.
2750
@@ -55,6 +78,12 @@ def __init__(self, filename, read_mode=False):
5578 for key, _, pos in self._read_all_values():
5679 self._positions[key] = pos
5780
81+ @staticmethod
82+ def read_all_values_from_file(filename):
83+ with open(filename, 'rb') as infp:
84+ data = infp.read()
85+ return _read_all_values(data)
86+
5887 def _init_value(self, key):
5988 """Initialize a value. Lock must be held by caller."""
6089 encoded = key.encode('utf-8')
@@ -74,30 +103,10 @@ def _init_value(self, key):
74103
75104 def _read_all_values(self):
76105 """Yield (key, value, pos). No locking is performed."""
77-
78- pos = 8
79-
80- # cache variables to local ones and prevent attributes lookup
81- # on every loop iteration
82- used = self._used
83- data = self._m
84-
85- while pos < used:
86- encoded_len = _unpack_integer(data, pos)[0]
87- # check we are not reading beyond bounds
88- if encoded_len + pos > used:
89- msg = 'Read beyond file size detected, %s is corrupted.'
90- raise RuntimeError(msg % self._fname)
91- pos += 4
92- encoded_key = data[pos : pos + encoded_len]
93- padded_len = encoded_len + (8 - (encoded_len + 4) % 8)
94- pos += padded_len
95- value = _unpack_double(data, pos)[0]
96- yield encoded_key.decode('utf-8'), value, pos
97- pos += 8
106+ return _read_all_values(data=self._m, used=self._used)
98107
99108 def read_all_values(self):
100- """Yield (key, value, pos ). No locking is performed."""
109+ """Yield (key, value). No locking is performed."""
101110 for k, v, _ in self._read_all_values():
102111 yield k, v
103112
0 commit comments