10000 Str Categorical Axis Support by story645 · Pull Request #6689 · matplotlib/matplotlib · GitHub
[go: up one dir, main page]

Skip to content

Str Categorical Axis Support #6689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 8, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Basic support for plotting lists of strings/categorical data. Support…
… for updating ticks/animation in progress/buggy, 'especially for scatter.
  • Loading branch information
story645 authored and tacaswell committed Jul 8, 2016
commit b1848420bf80cb40211f37fb32f3a2d07e9b5f7f
1 change: 1 addition & 0 deletions lib/matplotlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,7 @@ def _jupyter_nbextension_paths():
'matplotlib.tests.test_backend_svg',
'matplotlib.tests.test_basic',
'matplotlib.tests.test_bbox_tight',
'matplotlib.tests.test_category',
'matplotlib.tests.test_cbook',
'matplotlib.tests.test_coding_standards',
'matplotlib.tests.test_collections',
Expand Down
1 change: 1 addition & 0 deletions lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import matplotlib.collections as mcoll
import matplotlib.colors as mcolors
import matplotlib.contour as mcontour
import matplotlib.category as _ # <-registers a date unit converter
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo in comment.

import matplotlib.dates as _ # <-registers a date unit converter
from matplotlib import docstring
import matplotlib.image as mimage
Expand Down
12 changes: 12 additions & 0 deletions lib/matplotlib/axis.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ def __init__(self, axes, pickradius=15):
self.offsetText = self._get_offset_text()
self.majorTicks = []
self.minorTicks = []
self.unit_data = []
self.pickradius = pickradius

# Initialize here for testing; later add API
Expand Down Expand Up @@ -712,6 +713,17 @@ def _set_scale(self, value, **kwargs):
def limit_range_for_scale(self, vmin, vmax):
return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos())

@property
def unit_data(self):
"""Holds data that a ConversionInterface subclass relys on
to convert between labels and indexes
"""
return self._unit_data

@unit_data.setter
def unit_data(self, data):
self._unit_data = data

def get_children(self):
children = [self.label, self.offsetText]
majorticks = self.get_major_ticks()
Expand Down
117 changes: 117 additions & 0 deletions lib/matplotlib/category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
catch all for categorical functions
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)

import six
import numpy as np

import matplotlib.units as units
import matplotlib.ticker as ticker


class StrCategoryConverter(units.ConversionInterface):
@staticmethod
def convert(value, unit, axis):
"""Uses axis.unit_data map to encode
data as integers
"""

if isinstance(value, six.string_types):
return dict(axis.unit_data)[value]

vals = np.asarray(value, dtype='str')
for label, loc in axis.unit_data:
vals[vals == label] = loc
return vals.astype('float')

@staticmethod
def axisinfo(unit, axis):
seq, locs = zip(*axis.unit_data)
majloc = StrCategoryLocator(locs)
majfmt = StrCategoryFormatter(seq)
return units.AxisInfo(majloc=majloc, majfmt=majfmt)

@staticmethod
def default_units(data, axis):
# the conversion call stack is:
# default_units->axis_info->convert
axis.unit_data = map_categories(data, axis.unit_data)
return None


def map_categories(data, old_map=[], sort=True):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't put mutables in the signatures as defaults, it can lead to some fun closures.

def foo(d=[]):
    d.append(len(d))
    print(d)

gives

In [24]: foo()
[0]

In [25]: foo()
[0, 1]

In [26]: foo()
[0, 1, 2]

In [27]: foo()
[0, 1, 2, 3]

Either make this an empty tuple or make the default None and put logic in the function like

if old_map is None:
    old_map = []

"""Create mapping between unique categorical
values and numerical identifier.

Paramters
---------
data: iterable
sequence of values
old_map: list of tuple, optional
if not `None`, than old_mapping will be updated with new values and
previous mappings will remain unchanged)
sort: bool, optional
sort keys by ASCII value

Returns
-------
list of tuple
[(label, ticklocation),...]

"""

# code typical missing data in the negative range because
# everything else will always have positive encoding
# question able if it even makes sense
spdict = {'nan': -1, 'inf': -2, '-inf': -3}

# cast all data to str
strdata = [str(d) for d in data]

uniq = set(strdata)

category_map = old_map.copy()

if old_map:
olabs, okeys = zip(*old_map)
olabs, okeys = set(olabs), set(okeys)
svalue = max(okeys) + 1
else:
olabs, okeys = set(), set()
svalue = 0

new_labs = (uniq - olabs)

missing = (new_labs & set(spdict.keys()))
category_map.extend([(m, spdict[m]) for m in missing])

new_labs = (new_labs - missing)
if sort:
new_labs = list(new_labs)
new_labs.sort()

new_locs = range(svalue, svalue + len(new_labs))
category_map.extend(list(zip(new_labs, new_locs)))
return category_map


class StrCategoryLocator(ticker.FixedLocator):
def __init__(self, locs):
super(StrCategoryLocator, self).__init__(locs, None)


class StrCategoryFormatter(ticker.FixedFormatter):
def __init__(self, seq):
super(StrCategoryFormatter, self).__init__(seq)


# Connects the convertor to matplotlib
units.registry[bytearray] = StrCategoryConverter()
units.registry[str] = StrCategoryConverter()

if six.PY3:
units.registry[bytes] = StrCategoryConverter()
elif six.PY2:
units.registry[unicode] = StrCategoryConverter()
Binary file not shown.
Binary file not shown.
Loading
0