Initial commit
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
"""
|
||||
Module that allows plotting of string "category" data. i.e.
|
||||
``plot(['d', 'f', 'a'],[1, 2, 3])`` will plot three points with x-axis
|
||||
values of 'd', 'f', 'a'.
|
||||
|
||||
See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
|
||||
example.
|
||||
|
||||
The module uses Matplotlib's `matplotlib.units` mechanism to convert from
|
||||
strings to integers, provides a tick locator and formatter, and the
|
||||
class:`.UnitData` that creates and stores the string-to-integer mapping.
|
||||
"""
|
||||
|
||||
from collections import OrderedDict
|
||||
import dateutil.parser
|
||||
import itertools
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
|
||||
import matplotlib.cbook as cbook
|
||||
import matplotlib.units as units
|
||||
import matplotlib.ticker as ticker
|
||||
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StrCategoryConverter(units.ConversionInterface):
|
||||
@staticmethod
|
||||
def convert(value, unit, axis):
|
||||
"""Convert strings in value to floats using
|
||||
mapping information store in the unit object.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : string or iterable
|
||||
Value or list of values to be converted.
|
||||
unit : `.UnitData`
|
||||
An object mapping strings to integers.
|
||||
axis : `~matplotlib.axis.Axis`
|
||||
axis on which the converted value is plotted.
|
||||
|
||||
.. note:: *axis* is unused.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mapped_value : float or ndarray[float]
|
||||
"""
|
||||
if unit is None:
|
||||
raise ValueError(
|
||||
'Missing category information for StrCategoryConverter; '
|
||||
'this might be caused by unintendedly mixing categorical and '
|
||||
'numeric data')
|
||||
|
||||
# dtype = object preserves numerical pass throughs
|
||||
values = np.atleast_1d(np.array(value, dtype=object))
|
||||
|
||||
# pass through sequence of non binary numbers
|
||||
if all((units.ConversionInterface.is_numlike(v) and
|
||||
not isinstance(v, (str, bytes))) for v in values):
|
||||
return np.asarray(values, dtype=float)
|
||||
|
||||
# force an update so it also does type checking
|
||||
unit.update(values)
|
||||
|
||||
str2idx = np.vectorize(unit._mapping.__getitem__,
|
||||
otypes=[float])
|
||||
|
||||
mapped_value = str2idx(values)
|
||||
return mapped_value
|
||||
|
||||
@staticmethod
|
||||
def axisinfo(unit, axis):
|
||||
"""Sets the default axis ticks and labels
|
||||
|
||||
Parameters
|
||||
----------
|
||||
unit : `.UnitData`
|
||||
object string unit information for value
|
||||
axis : `~matplotlib.Axis.axis`
|
||||
axis for which information is being set
|
||||
|
||||
Returns
|
||||
-------
|
||||
axisinfo : `~matplotlib.units.AxisInfo`
|
||||
Information to support default tick labeling
|
||||
|
||||
.. note: axis is not used
|
||||
"""
|
||||
# locator and formatter take mapping dict because
|
||||
# args need to be pass by reference for updates
|
||||
majloc = StrCategoryLocator(unit._mapping)
|
||||
majfmt = StrCategoryFormatter(unit._mapping)
|
||||
return units.AxisInfo(majloc=majloc, majfmt=majfmt)
|
||||
|
||||
@staticmethod
|
||||
def default_units(data, axis):
|
||||
"""Sets and updates the :class:`~matplotlib.Axis.axis` units.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : string or iterable of strings
|
||||
axis : `~matplotlib.Axis.axis`
|
||||
axis on which the data is plotted
|
||||
|
||||
Returns
|
||||
-------
|
||||
class : `.UnitData`
|
||||
object storing string to integer mapping
|
||||
"""
|
||||
# the conversion call stack is supposed to be
|
||||
# default_units->axis_info->convert
|
||||
if axis.units is None:
|
||||
axis.set_units(UnitData(data))
|
||||
else:
|
||||
axis.units.update(data)
|
||||
return axis.units
|
||||
|
||||
|
||||
class StrCategoryLocator(ticker.Locator):
|
||||
"""tick at every integer mapping of the string data"""
|
||||
def __init__(self, units_mapping):
|
||||
"""
|
||||
Parameters
|
||||
-----------
|
||||
units_mapping : Dict[str, int]
|
||||
string:integer mapping
|
||||
"""
|
||||
self._units = units_mapping
|
||||
|
||||
def __call__(self):
|
||||
return list(self._units.values())
|
||||
|
||||
def tick_values(self, vmin, vmax):
|
||||
return self()
|
||||
|
||||
|
||||
class StrCategoryFormatter(ticker.Formatter):
|
||||
"""String representation of the data at every tick"""
|
||||
def __init__(self, units_mapping):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
units_mapping : Dict[Str, int]
|
||||
string:integer mapping
|
||||
"""
|
||||
self._units = units_mapping
|
||||
|
||||
def __call__(self, x, pos=None):
|
||||
if pos is None:
|
||||
return ""
|
||||
r_mapping = {v: StrCategoryFormatter._text(k)
|
||||
for k, v in self._units.items()}
|
||||
return r_mapping.get(int(np.round(x)), '')
|
||||
|
||||
@staticmethod
|
||||
def _text(value):
|
||||
"""Converts text values into utf-8 or ascii strings.
|
||||
"""
|
||||
if isinstance(value, bytes):
|
||||
value = value.decode(encoding='utf-8')
|
||||
elif not isinstance(value, str):
|
||||
value = str(value)
|
||||
return value
|
||||
|
||||
|
||||
class UnitData(object):
|
||||
def __init__(self, data=None):
|
||||
"""
|
||||
Create mapping between unique categorical values and integer ids.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : iterable
|
||||
sequence of string values
|
||||
"""
|
||||
self._mapping = OrderedDict()
|
||||
self._counter = itertools.count()
|
||||
if data is not None:
|
||||
self.update(data)
|
||||
|
||||
@staticmethod
|
||||
def _str_is_convertible(val):
|
||||
"""
|
||||
Helper method to see if a string can be cast to float or
|
||||
parsed as date.
|
||||
"""
|
||||
try:
|
||||
float(val)
|
||||
except ValueError:
|
||||
try:
|
||||
dateutil.parser.parse(val)
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def update(self, data):
|
||||
"""Maps new values to integer identifiers.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : iterable
|
||||
sequence of string values
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
If the value in data is not a string, unicode, bytes type
|
||||
"""
|
||||
data = np.atleast_1d(np.array(data, dtype=object))
|
||||
|
||||
# check if convertible to number:
|
||||
convertible = True
|
||||
for val in OrderedDict.fromkeys(data):
|
||||
# OrderedDict just iterates over unique values in data.
|
||||
if not isinstance(val, (str, bytes)):
|
||||
raise TypeError("{val!r} is not a string".format(val=val))
|
||||
if convertible:
|
||||
# this will only be called so long as convertible is True.
|
||||
convertible = self._str_is_convertible(val)
|
||||
if val not in self._mapping:
|
||||
self._mapping[val] = next(self._counter)
|
||||
if convertible:
|
||||
_log.info('Using categorical units to plot a list of strings '
|
||||
'that are all parsable as floats or dates. If these '
|
||||
'strings should be plotted as numbers, cast to the '
|
||||
'appropriate data type before plotting.')
|
||||
|
||||
|
||||
# Register the converter with Matplotlib's unit framework
|
||||
units.registry[str] = StrCategoryConverter()
|
||||
units.registry[np.str_] = StrCategoryConverter()
|
||||
units.registry[bytes] = StrCategoryConverter()
|
||||
units.registry[np.bytes_] = StrCategoryConverter()
|
||||
Reference in New Issue
Block a user