8000 bpo-40755: Add missing multiset operations to Counter() (GH-20339) · python/cpython@6039851 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6039851

Browse files
authored
bpo-40755: Add missing multiset operations to Counter() (GH-20339)
1 parent 0de437d commit 6039851

File tree

4 files changed

+206
-6
lines changed

4 files changed

+206
-6
lines changed

Doc/library/collections.rst

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,47 @@ For example::
290290
>>> sorted(c.elements())
291291
['a', 'a', 'a', 'a', 'b', 'b']
292292

293+
.. method:: isdisjoint(other)
294+
295+
True if none of the elements in *self* overlap with those in *other*.
296+
Negative or missing counts are ignored.
297+
Logically equivalent to: ``not (+self) & (+other)``
298+
299+
.. versionadded:: 3.10
300+
301+
.. method:: isequal(other)
302+
303+
Test whether counts agree exactly.
304+
Negative or missing counts are treated as zero.
305+
306+
This method works differently than the inherited :meth:`__eq__` method
307+
which treats negative or missing counts as distinct from zero::
308+
309+
>>> Counter(a=1, b=0).isequal(Counter(a=1))
310+
True
311+
>>> Counter(a=1, b=0) == Counter(a=1)
312+
False
313+
314+
Logically equivalent to: ``+self == +other``
315+
316+
.. versionadded:: 3.10
317+
318+
.. method:: issubset(other)
319+
320+
True if the counts in *self* are less than or equal to those in *other*.
321+
Negative or missing counts are treated as zero.
322+
Logically equivalent to: ``not self - (+other)``
323+
324+
.. versionadded:: 3.10
325+
326+
.. method:: issuperset(other)
327+
328+
True if the counts in *self* are greater than or equal to those in *other*.
329+
Negative or missing counts are treated as zero.
330+
Logically equivalent to: ``not other - (+self)``
331+
332+
.. versionadded:: 3.10
333+
293334
.. method:: most_common([n])
294335

295336
Return a list of the *n* most common elements and their counts from the

Lib/collections/__init__.py

Lines changed: 104 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -710,12 +710,24 @@ def __repr__(self):
710710
# To strip negative and zero counts, add-in an empty counter:
711711
# c += Counter()
712712
#
713-
# Rich comparison operators for multiset subset and superset tests
714-
# are deliberately omitted due to semantic conflicts with the
715-
# existing inherited dict equality method. Subset and superset
716-
# semantics ignore zero counts and require that p≤q ∧ p≥q → p=q;
717-
# however, that would not be the case for p=Counter(a=1, b=0)
718-
# and q=Counter(a=1) where the dictionaries are not equal.
713+
# When the multiplicities are all zero or one, multiset operations
714+
# are guaranteed to be equivalent to the corresponding operations
715+
# for regular sets.
716+
# Given counter multisets such as:
717+
# cp = Counter(a=1, b=0, c=1)
718+
# cq = Counter(c=1, d=0, e=1)
719+
# The corresponding regular sets would be:
720+
# sp = {'a', 'c'}
721+
# sq = {'c', 'e'}
722+
# All of the following relations would hold:
723+
# set(cp + cq) == sp | sq
724+
# set(cp - cq) == sp - sq
725+
# set(cp | cq) == sp | sq
726+
# set(cp & cq) == sp & sq
727+
# cp.isequal(cq) == (sp == sq)
728+
# cp.issubset(cq) == sp.issubset(sq)
729+
# cp.issuperset(cq) == sp.issuperset(sq)
730+
# cp.isdisjoint(cq) == sp.isdisjoint(sq)
719731

720732
def __add__(self, other):
721733
'''Add counts from two counters.
@@ -874,6 +886,92 @@ def __iand__(self, other):
874886
self[elem] = other_count
875887
return self._keep_positive()
876888

889+
def isequal(self, other):
890+
''' Test whether counts agree exactly.
891+
892+
Negative or missing counts are treated as zero.
893+
894+
This is different than the inherited __eq__() method which
895+
treats negative or missing counts as distinct from zero:
896+
897+
>>> Counter(a=1, b=0).isequal(Counter(a=1))
898+
True
899+
>>> Counter(a=1, b=0) == Counter(a=1)
900+
False
901+
902+
Logically equivalent to: +self == +other
903+
'''
904+
if not isinstance(other, Counter):
905+
other = Counter(other)
906+
for elem in set(self) | set(other):
907+
left = self[elem]
908+
right = other[elem]
909+
if left == right:
910+
continue
911+
if left < 0:
912+
left = 0
913+
if right < 0:
914+
right = 0
915+
if left != right:
916+
return False
917+
return True
918+
919+
def issubset(self, other):
920+
'''True if the counts in self are less than or equal to those in other.
921+
922+
Negative or missing counts are treated as zero.
923+
924+
Logically equivalent to: not self - (+other)
925+
'''
926+
if not isinstance(other, Counter):
927+
other = Counter(other)
928+
for elem, count in self.items():
929+
other_count = other[elem]
930+
if other_count < 0:
931+
other_count = 0
932+
if count > other_count:
933+
return False
934+
return True
935+
936+
def issuperset(self, other):
937+
'''True if the counts in self are greater than or equal to those in other.
938+
939+
Negative or missing counts are treated as zero.
940+
941+
Logically equivalent to: not other - (+self)
942+
'''
943+
if not isinstance(other, Counter):
944+
other = Counter(other)
945+
return other.issubset(self)
946+
947+
def isdisjoint(self, other):
948+
'''True if none of the elements in self overlap with those in other.
949+
950+
Negative or missing counts are ignored.
951+
952+
Logically equivalent to: not (+self) & (+other)
953+
'''
954+
if not isinstance(other, Counter):
955+
other = Counter(other)
956+
for elem, count in self.items():
957+
if count > 0 and other[elem] > 0:
958+
return False
959+
return True
960+
961+
# Rich comparison operators for multiset subset and superset tests
962+
# have been deliberately omitted due to semantic conflicts with the
963+
# existing inherited dict equality method. Subset and superset
964+
# semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
965+
# however, that would not be the case for p=Counter(a=1, b=0)
966+
# and q=Counter(a=1) where the dictionaries are not equal.
967+
968+
def _omitted(self, other):
969+
raise TypeError(
970+
'Rich comparison operators have been deliberately omitted. '
971+
'Use the isequal(), issubset(), and issuperset() methods instead.')
972+
973+
__lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted
974+
877975

878976
########################################################################
879977
### ChainMap

Lib/test/test_collections.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import operator
88
import pickle
99
from random import choice, randrange
10+
from itertools import product, chain, combinations
1011
import string
1112
import sys
1213
from test import support
@@ -2219,6 +2220,64 @@ def test_helper_function(self):
22192220
self.assertTrue(c.called)
22202221
self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })
22212222

2223+
def test_multiset_operations_equivalent_to_set_operations(self):
2224+
# When the multiplicities are all zero or one, multiset operations
2225+
# are guaranteed to be equivalent to the corresponding operations
2226+
# for regular sets.
2227+
s = list(product(('a', 'b', 'c'), range(2)))
2228+
powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
2229+
counters = [Counter(dict(groups)) for groups in powerset]
2230+
for cp, cq in product(counters, repeat=2):
2231+
sp = set(cp.elements())
2232+
sq = set(cq.elements())
2233+
self.assertEqual(set(cp + cq), sp | sq)
2234+
self.assertEqual(set(cp - cq), sp - sq)
2235+
self.assertEqual(set(cp | cq), sp | sq)
2236+
self.assertEqual(set(cp & cq), sp & sq)
2237+
self.assertEqual(cp.isequal(cq), sp == sq)
2238+
self.assertEqual(cp.issubset(cq), sp.issubset(sq))
2239+
self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
2240+
self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))
2241+
2242+
def test_multiset_equal(self):
2243+
self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
2244+
self.assertFalse(Counter(a=3, b=2).isequal('babab'))
2245+
2246+
def test_multiset_subset(self):
2247+
self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
2248+
self.assertFalse(Counter(a=3, b=2).issubset('babab'))
2249+
2250+
def test_multiset_superset(self):
2251+
self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
2252+
self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))
2253+
2254+
def test_multiset_disjoint(self):
2255+
self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
2256+
self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))
2257+
2258+
def test_multiset_predicates_with_negative_counts(self):
2259+
# Multiset predicates run on the output of the elements() method,
2260+
# meaning that zero counts and negative counts are ignored.
2261+
# The tests below confirm that we get that same results as the
2262+
# tests above, even after a negative count has been included
2263+
# in either *self* or *other*.
2264+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
2265+
self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
2266+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
2267+
self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
2268+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
2269+
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
2270+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
2271+
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
2272+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
2273+
self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
2274+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
2275+
self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
2276+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
2277+
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
2278+
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
2279+
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))
2280+
22222281

22232282
################################################################################
22242283
### Run tests
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add multiset comparison methods to collections.Counter(): isequal(),
2+
issubset(), issuperset(), and isdisjoint().

0 commit comments

Comments
 (0)
0