Source code for plastid.plotting.plots

#!/usr/bin/env python
"""Sommon common plots that are not directly implemented in :mod:`matplotlib`,
as well as some specific plots used by :mod:`plastid`. Demos of these appear
in :doc:`/examples/z_plotting`.

General plots
-------------
    :func:`stacked_bar`
        Create a stacked bar graph

    :func:`kde_plot`
        Plot a kernel density estimate (continuous histogram) of data

    :func:`scatterhist_x`, :func:`scatterhist_y`, and :func:`scatterhist_xy`
        Create scatter plots with kernel density estimates of the marginal 
        distributions along side them

    :func:`profile_heatmap`
        Plot a heatmap, with a columnwise median (or other profile summarizing
        the heatmap) in an upper panel above it, with aligned coordinates

    :func:`triangle_plot`
        Plot data lying on the plane x + y + z = k (e.g. codon phasing)
        in a homogeneous 2D representation


Plots for genomics
------------------
    :func:`ma_plot`
        Plot fold changes between `x` and `y` (:math:`\log_{2} (y/x)`) as
        a function of the mean of x and y (:math:`0.5*(x+y)`).

    :func:`phase_plot`
        For ribosome profiling. Plot sub-codon phasing of ribosome-protected
        foorpints stratified by read length, as well as the fraction of total
        reads represented by each length.

"""
import copy
import numpy
import scipy.stats
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.artist import Artist
from plastid.plotting.colors import lighten, darken, process_black
from plastid.plotting.plotutils import get_fig_axes, split_axes, clean_invalid, get_kde

#==============================================================================
# Default keyword arguments for plots or subplots
# and helper functions
#==============================================================================

plastid_default_scatter = {
    "marker"    : "o",
    "alpha"     : 0.7,
    "facecolor" : "none",
    "s"         : 8,
    "rasterized": True,
} # yapf: disable
"""Default parameters for scatter plots"""


[docs]def get_color_cycle(ax):
    """Get color cycle iterator from multiple versions of matplotlib axes

    Parameters
    ----------
    ax : :class:`matplotlib.axes.Axes`


    Returns
    -------
    iterator
        Iterator over colors, passable to matplotlib `color` keyword
    """
    if hasattr(ax, "_get_lines"):
        if hasattr(ax._get_lines, "prop_cycler"):
            return (X["color"] for X in ax._get_lines.prop_cycler)
        elif hasattr(ax._get_lines, "color_cycle"):
            return ax._get_lines.color_cycle
    else:
        raise AssertionError("get_color_cycle: Could not find color cycle")


#==============================================================================
# Stacked bar
#==============================================================================


[docs]def stacked_bar(data, axes=None, labels=None, lighten_by=0.1, cmap=None, **kwargs):
    """Create a stacked bar graph
    
    Parameters
    ----------
    data : :class:`numpy.ndarray`
        Array of data, in which each row is a stack, each column a value in that stack.

    axes : :class:`matplotlib.axes.Axes` or `None`, optional
        Axes in which to place plot. If `None`, a new figure is generated.
        (Default: `None`)
        
    labels : list, optional
        Labels for each stack. If `None`, stacks are labeled sequentially by number.
        (Default: `None`)
    
    lighten_by : float, optional
        Amount by which to lighten sequential blocks in each stack. (Default: 0.10)
    
    cmap : :class:`matplotlib.colors.Colormap`, optional
        Colormap from which to generate bar colors. If supplied, will override
        any `color` attribute in `**kwargs`. (Default: `None`)
        
    **kwargs : keyword arguments
        Other keyword arguments to pass to :func:`matplotlib.pyplot.bar`

    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Parent figure of axes
    
    :class:`matplotlib.axes.Axes`
        Axes containing plot
    """
    fig, ax = get_fig_axes(axes)
    rows, cols = data.shape
    labels = labels if labels is not None else range(rows)
    defaults = [("align", "center"), ("width", 0.8)]

    if cmap is not None:
        kwargs["color"] = cmap(numpy.linspace(0, 1.0, num=10))
    elif kwargs.get("color", None) is None:
        kwargs["color"] = [next(get_color_cycle(ax)) for _ in range(rows)]

    x = numpy.arange(rows) + 0.5
    xaxis = ax.xaxis
    xaxis.set_ticks(x)
    xaxis.set_ticklabels(labels)
    bottoms = numpy.zeros(rows)

    for k, v in defaults:
        if k not in kwargs:
            kwargs[k] = v

    for i in range(cols):
        color = kwargs["color"]
        if i > 0:
            kwargs["color"] = lighten(color, amt=lighten_by)

        heights = data[:, i]
        plt.bar(x, heights, bottom=bottoms, **kwargs)
        heights.shape
        bottoms += heights

    ax.set_xlim(-0.5, rows + 0.5)

    return fig, ax


#==============================================================================
# Kernel density estimate
#==============================================================================

[docs]def kde_plot(
        data,
        axes      = None,
        color     = None,
        label     = None,
        alpha     = 0.7,
        vert      = False,
        log       = False,
        base      = 10,
        points    = 500,
        bw_method = "scott",
        rescale   = False,
        zorder    = None,
        fill      = True
): # yapf: disable
    """Plot a kernel density estimate of `data` on `axes`.

    Parameters
    ----------
    data : :class:`numpy.ndarray`
        Array of data

    axes : :class:`matplotlib.axes.Axes` or `None`, optional
        Axes in which to place plot. If `None`, a new figure is generated.
        (Default: `None`)
        
    color : matplotlib colorspec, optional
        Color to use for plotting (Default: use next in matplotlibrc)

    label : str, optional
        Name of data series (used for legend; default: `None`)

    alpha : float, optional
        Amount of alpha transparency to use (Default: 0.7)

    vert : bool, optional
        If true, plot kde vertically

    log : bool, optional
        If `True`, `data` is log-transformed before the kde is estimated.
        Data are converted back to non-log space afterwards.

    base : 2, 10, or :obj:`numpy.e`, optional
        If `log` is `True`, this serves as the base of the log space.
        If `log` is `False`, this is ignored. (Default: 2)

    points : int
        Number of points over which to evaluate kde. (Default: 100)

    bw_method : str
        Bandwith estimation method. See documentation for
        :obj:`scipy.stats.gaussian_kde`. (Default: "scott")    


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Parent figure of axes
    
    :class:`matplotlib.axes.Axes`
        Axes containing plot
    """
    fig, axes = get_fig_axes(axes)

    if color is None:
        color = next(get_color_cycle(axes))

    a, b = get_kde(data, log=log, base=base, points=points, bw_method=bw_method)

    if rescale == True:
        b /= b.max()

    fbargs = {"alpha": alpha, "facecolor": lighten(color), "edgecolor": color}
    if label is not None:
        fbargs["label"] = label

    if vert == True:
        if fill == True:
            axes.fill_betweenx(a, b, 0, **fbargs)

        # plot twice because in some versions of matplotlib, 
        # fill_between doesn't add to legend
        # TODO: remove in future when mpl's behavior changes
        axes.plot(
            b, a, color=color, alpha=alpha, label=label
        )
        if log == True:
            axes.semilogy()
    else:
        if fill == True:
            axes.fill_between(a, b, 0, **fbargs)
        axes.plot(a, b, color=color, alpha=alpha, label=label)
        if log == True:
            axes.semilogx()

    return fig, axes


#==============================================================================
# Triangle plot
#==============================================================================

rotate = numpy.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]])

_triA = numpy.array([[1, 0, 0], [-1, -1, 1]])
_triT = numpy.array([[0.5, 1], [0.5 * (3**0.5), 0]])
_triTA = _triT.dot(_triA)

_triverts = numpy.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]])


[docs]def trianglize(data):
    """Convert points from triangular space to Cartesian space for plotting.
    Called internally by :func:`triangle_plot`.

    Parameters
    ----------
    data : class:`numpy.ndarray`
        Mx2 or Mx3 list or array of points in triangular space, where
        the first column is the first coordinate, the second column
        the second, and the third, the third.
    
    Returns
    -------
    :class:`numpy.ndarray`
        Corresponding Nx2 array of points in Cartesian space, for plotting
        on a standard axis
    """

    if data.shape[1] == 2:
        data = _triTA.dot(numpy.hstack([data, numpy.ones((data.shape[0], 1))]).T).T
    else:
        data = _triT.dot(data[:, (0, 2)].T).T

    return data


[docs]def triangle_plot(
        data,
        axes=None,
        fn="scatter",
        vertex_labels=None,
        grid=None,
        clip=True,
        do_setup=True,
        **kwargs
):
    """Plot data lying in a plane x + y + z = k in a homogenous triangular space.

    Parameters
    ----------
    data : :class:`numpy.ndarray`
        Mx2 or Mx3 list or array of points in triangular space, where
        the first column is the first coordinate, the second column
        the second, and the third, the third.

    axes : :class:`matplotlib.axes.Axes` or `None`, optional
        Axes in which to place plot. If `None`, a new figure is generated.
        (Default: `None`)
       
    fn : str, optional
        Name of plotting function. Must correspond to an attribute of a
        :class:`matplotlib.axes.Axes` (e.g. 'scatter', 'plot', 'hexbin'et c.),
        that is be able to take an Nx2 :class:`numpy.ndarray` in Cartesian space
        as input (e.g. 'plot', 'scatter', 'hexbin'; Default: 'scatter').

    vertex_labels : list or None, optional
        Labels for vertex. If `None`, vertices aren't labeled. (Default: `None`)

    grid : :class:`numpy.ndarray` or None, optional
        If not `None`, draw gridlines at intervals specified in `grid`,
        as long as the grid coordinate is > 0.33333 (center of triangle)
        and <= 1.0 (border).

    clip : bool, optional
        If `True` clipping masks corresponding to the triangle boundaries
        will be applied to all plot elements (Default: `True`)
        
    do_setup : bool, optional
        If `True`, the plot area will be prepared. A triangle will be drawn,
        gridlines drawn, et c. Specify `False` if plotting a second dataset
        ontop of an already-prepared axes (Default: `True`)

    **kwargs : keyword arguments
        Other keyword arguments to pass to function specified by `fn`.

    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Parent figure of axes
    
    :class:`matplotlib.axes.Axes`
        Axes containing plot

    """
    fig, ax = get_fig_axes(axes)
    mplrc = matplotlib.rcParams

    if do_setup == True:
        triverts = trianglize(_triverts)
        tripatch = matplotlib.patches.Polygon(
            triverts,
            closed=True,
            facecolor=mplrc["axes.facecolor"],
            edgecolor=mplrc["axes.edgecolor"],
            linewidth=mplrc["axes.linewidth"],
            zorder=-10
        )
        ax.add_patch(tripatch)

        # format axes
        ax.set_xlim((0, 1))
        ax.set_ylim((0, _triverts[:, 1].max()))
        ax.set_frame_on(False)
        ax.set_xticks([])
        ax.set_yticks([])

        # label vertices
        if vertex_labels is not None:
            l1, l2, l3 = vertex_labels

            tkwargs = {"fig": fig, "units": "points"}
            p1trans = matplotlib.transforms.offset_copy(ax.transData, x=0, y=8, **tkwargs)
            p2trans = matplotlib.transforms.offset_copy(ax.transData, x=-10, y=-12, **tkwargs)
            p3trans = matplotlib.transforms.offset_copy(ax.transData, x=10, y=-12, **tkwargs)
            ax.text(triverts[0, 0], triverts[0, 1], l1, transform=p1trans)
            ax.text(triverts[1, 0], triverts[1, 1], l2, transform=p2trans)
            ax.text(triverts[2, 0], triverts[2, 1], l3, transform=p3trans)

        # add gridlines
        grid_kwargs = {
            K.replace("grid.", ""): V
            for (K, V) in mplrc.items() if K.startswith("grid")
        }
        if grid is not None:
            grid = numpy.array(grid)
            remainders = (1.0 - grid) / 2
            for i, r in zip(grid, remainders):
                if i >= 1.0 / 3:
                    points = [numpy.array([i, r, r])]
                    for _ in range(3):
                        points.append(rotate.dot(points[-1]))

                    points = numpy.array(points)
                    points = trianglize(points[:, [0, 2]])

                    myline = matplotlib.lines.Line2D(points[:, 0], points[:, 1], **grid_kwargs)
                    ax.add_line(myline)

    # scale data
    data = trianglize(data)

    # plot data
    artists = []
    fn = getattr(ax, fn)
    res = fn(*zip(*data), **kwargs)
    if isinstance(res, Artist):
        artists.append(res)
    elif isinstance(res, list):
        artists.extend([X for X in res if isinstance(X, Artist)])

    # clip
    if clip == True:
        for artist in artists:
            artist.set_clip_path(tripatch)
            artist.set_clip_on(True)

    return fig, ax


#==============================================================================
# Heatmaps with profiles on top
#==============================================================================


[docs]def sort_max_position(data):
    """Generate indices that sort rows in `data` by column in which
    the row's maximal value is attained

    Parameters
    ----------
    data : :class:`numpy.ndarray`

    Returns
    -------
    :class:`numpy.ndarray`
        Indices of rows that sort data by max position
    """
    maxvals = numpy.nanmax(data, 1)
    maxidx = numpy.zeros(len(maxvals))
    for i, maxval in enumerate(maxvals):
        maxidx[i] = (data[i, :] == maxval).argmax()

    return numpy.argsort(maxidx)


_heatmap_defaults = {
    "aspect": "auto",
    "origin": "upper",
    "interpolation": "none",
}

[docs]def profile_heatmap(
        data,
        profile   = None,
        x         = None,
        axes      = None,
        sort_fn   = sort_max_position,
        cmap      = None,
        nancolor  = "#666666",
        im_args   = {},
        plot_args = {}
): # yapf: disable
    """Create a dual-paned plot in which `profile` is displayed in a top
    panel, above a heatmap showing the intensities of each row of `data`,
    optionally sorted top-to-bottom by `sort_fn`.

    Parameters
    ----------
    data : :class:`numpy.ndarray`
        Array of data, in which each row is an individual aligned vector of data
        for region of interest, and each column a position in that vector

    profile : :class:`numpy.ndarray` or None
        Reduced profile of data, often a column-wise median. If not
        supplied, it will be calculated.

    x : :class:`numpy.ndarray`
        Array of values for X-axis

    axes : :class:`matplotlib.axes.Axes` or `None`, optional
        Axes in which to place plot. If `None`, a new figure is generated.
        (Default: `None`)
       
    sort_fn : function, optional
        Sort rows in `data` by this function before plotting. Function must
        return a :class:`numpy.ndarray` of indices corresponding to rows in `data`
        (Default: sort by ascending argmax of each row)

    cmap : :class:`~matplotlib.colors.Colormap`, optional
        Colormap to use in heatmap. It not `None`, overrides any value
        in `im_args`. (Default: `None`) 

    nancolor : str or matplotlib colorspec
        Color used for plotting `nan` and other illegal or masked values
        
    im_args : dict
        Keyword arguments to pass to :func:`matplotlib.pyplot.imshow`

    plot_args : dict
        Keyword arguments to pass to :func:`matplotlib.pyplot.plot`
        for plotting metagene average


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Parent figure of axes
    
    dict
        Dictionary of :class:`matplotlib.axes.Axes`. "top" refers to the 
        panel containing the summary profile. "main" refers to the heatmap
        of individual values
    """
    fig, ax = get_fig_axes(axes)
    axes = split_axes(ax, top_height=0.2)

    if sort_fn is None:
        sort_indices = numpy.arange(data.shape[0])
    else:
        sort_indices = sort_fn(data)

    if x is None:
        x = numpy.arange(0, data.shape[1])

    if profile is None:
        profile = numpy.nanmedian(data, axis=0)

    im_args = copy.deepcopy(im_args)

    # populate with defaults
    for k, v in _heatmap_defaults.items():
        if k not in im_args:
            im_args[k] = v

    if "extent" not in im_args:
        im_args["extent"] = [x.min(), x.max(), 0, data.shape[0]]
    if "vmin" not in im_args:
        im_args["vmin"] = numpy.nanmin(data)
    if "vmax" not in im_args:
        im_args["vmax"] = numpy.nanmax(data)

    if cmap is not None:
        im_args["cmap"] = cmap
    elif "cmap" in im_args:
        cmap = matplotlib.cm.get_cmap(im_args["cmap"])
    else:
        cmap = matplotlib.cm.get_cmap()

    cmap.set_bad(nancolor, 1.0)

    axes["top"].plot(x, profile, **plot_args)
    axes["top"].set_ylim(0, profile.max())
    axes["top"].set_xlim(x.min(), x.max())
    #axes["top"].set_yticks([])
    axes["top"].set_yticks([0, profile.max()])
    axes["top"].xaxis.tick_bottom()
    axes["top"].grid(True, which="both")

    axes["main"].xaxis.tick_bottom()
    axes["main"].imshow(data[sort_indices, :], **im_args)

    return fig, axes


#==============================================================================
# Scatter plots with marginal distributions
#==============================================================================

def _scatterhist_help(
        axes          = None,
        top_height    = 0,
        left_width    = 0,
        right_width   = 0,
        bottom_height = 0,
): # yapf: disable
    """Create a scatter plot with the marginal distribution for `x`
    plotted in a separate pain as a kernel density estimate.

    Parameters
    ----------
    color : matplotlib colorspec, or None, optional
        Color to plot data series

    axes : :class:`matplotlib.axes.Axes`, dict, or `None`, optional
        If a :class:`matplotlib.axes.Axes`, an axes in which to place plot.
        This axes will be split into relevant panels.

        If a dict, this is expected to be a group of pre-split axes.

        If `None`, a new figure is generated, and axes are split.
        (Default: `None`)
      
    top_height, left_width, right_width, bottom_height : float, optional
        If not `None`, a panel on the corresponding side of the `ax` will
        be created, using whatever fraction is specified (e.g. 0.1 to use
        10% of total height).    


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Parent figure of axes
    
    dict of :class:`matplotlib.axes.Axes`
        axes containing plot space
    """
    if axes is None:
        fig = plt.figure()
        axes = plt.gca()
    if isinstance(axes, matplotlib.axes.Axes):
        fig = axes.figure
        axes = split_axes(
            axes,
            top_height=top_height,
            left_width=left_width,
            right_width=right_width,
            bottom_height=bottom_height
        )
    elif isinstance(axes, dict):
        fig = axes["main"].figure
        if left_width > 0:
            assert "left" in axes
        if right_width > 0:
            assert "right" in axes
        if bottom_height > 0:
            assert "bottom" in axes
        if top_height > 0:
            assert "top" in axes

    return fig, axes

[docs]def scatterhist_x(
        x,
        y,
        color        = None,
        axes         = None,
        label        = None,
        top_height   = 0.2,
        mask_invalid = True,
        log          = "",
        min_x        = -numpy.inf,
        min_y        = -numpy.inf,
        max_x        = numpy.inf,
        max_y        = numpy.inf,
        scargs       = plastid_default_scatter,
        bw_method    = "scott",
        kdalpha      = 0.7
): # yapf: disable
    """Produce a scatter plot with a kernel density estimate of the marginal `x` distribution

    Parameters
    ----------
    x, y : :class:`numpy.ndarray` or list
        Pair arrays or lists of corresponding numbers

    color : matplotlib colorspec, optional
        Color to use in plot

    label : str, or None
        If not None, a label for plotting

    axes : :class:`matplotlib.axes.Axes`, dict, or `None`, optional
        If a :class:`matplotlib.axes.Axes`, an axes in which to place plot.
        This axes will be split into relevant panels.

        If a dict, this is expected to be a group of pre-split axes.

        If `None`, a new figure is generated, and axes are split.
        (Default: `None`)

    top_height : float, optional
        fraction of `axes` height to use in top panel containing
        marginal distribution (Default: 0.2)

    mask_invalid : bool, optional
        If `True` mask out any `nan`s or `inf`s, as these mess up kernel density
        estimates and histograms in matplotlib, even if in masked arrays

    min_x, min_y, max_x, max_y : number, optional
        If supplied, set values below `min_x` to `min_x`, values larger
        than `max_x` to `max_x` and so for `min_y` and `max_y`

    log : str, "", "x", "xy", or "xy", optional
        Plot these axes on a log scale (Default: "" for no log axes)

    scargs : Keyword arguments, optional
        Arguments to pass to :func:`~matplotlib.pyplot.scatter`
        (Default: :obj:`plastid_default_scatter`). We highly recommend
        setting `rasterized` to `True`!

    kdalpha : float, optional
        Alpha level (transparency) for marginal distributions (Default: 0.7)

    bw_method : str
        Bandwith estimation method. See documentation for
        :obj:`scipy.stats.gaussian_kde`. (Default: `"scott"`)


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Figure

    dict
        Dictionary of axes. `'orig'` refers to `ax`. The central panel is `'main'`.
        Other panels will be mapped to `'top'`, `'left'` et c, if they are created.
    """
    fig, axes = _scatterhist_help(axes=axes, top_height=top_height)
    xlog = False

    if color is None:
        color = next(get_color_cycle(axes["main"]))

    if mask_invalid == True:
        x, y = clean_invalid(x, y, min_x=min_x, max_x=max_x, min_y=min_y, max_y=max_y)

    if label is not None:
        scargs = copy.deepcopy(scargs)
        scargs["label"] = label

    if "x" in log:
        axes["main"].semilogx()
        axes["top"].semilogx()
        xlog = True
        xmask = x > 0
    else:
        xmask = numpy.tile(True, x.shape)

    if "y" in log:
        axes["main"].semilogy()

    axes["main"].scatter(x, y, edgecolor=color, **scargs)

    # arguments for kernel desntiy plot
    kargs = {
        "color": color,
        "alpha": kdalpha,
        "bw_method": bw_method,
    }

    if xmask.sum() > 0:
        kde_plot(x[xmask], log=xlog, axes=axes["top"], **kargs)

    return fig, axes

[docs]def scatterhist_y(
        x,
        y,
        color        = None,
        axes         = None,
        label        = None,
        right_width  = 0.2,
        mask_invalid = True,
        log          = "xy",
        min_x        = -numpy.inf,
        min_y        = -numpy.inf,
        max_x        = numpy.inf,
        max_y        = numpy.inf,
        scargs       = plastid_default_scatter,
        bw_method    = "scott",
        kdalpha      = 0.7
): # yapf: disable
    """Produce a scatter plot with a kernel density estimate of the marginal `y` distribution

    Parameters
    ----------
    x, y : :class:`numpy.ndarray` or list
        Pair arrays or lists of corresponding numbers

    color : matplotlib colorspec, optional
        Color to use in plot

    label : str, or None
        If not None, a label for plotting

    axes : :class:`matplotlib.axes.Axes`, dict, or `None`, optional
        If a :class:`matplotlib.axes.Axes`, an axes in which to place plot.
        This axes will be split into relevant panels.

        If a dict, this is expected to be a group of pre-split axes.

        If `None`, a new figure is generated, and axes are split.
        (Default: `None`)

    right_width : float, optional
        fraction of `axes` width to use in right panel containing
        marginal distribution (Default: 0.2)

    mask_invalid : bool, optional
        If `True` mask out any `nan`s or `inf`s, as these mess up kernel density
        estimates and histograms in matplotlib, even if in masked arrays

    min_x, min_y, max_x, max_y : number, optional
        If supplied, set values below `min_x` to `min_x`, values larger
        than `max_x` to `max_x` and so for `min_y` and `max_y`

    log : str, "", "x", "xy", or "xy", optional
        Plot these axes on a log scale (Default: "" for no log axes)

    scargs : Keyword arguments, optional
        Arguments to pass to :func:`~matplotlib.pyplot.scatter`
        (Default: :obj:`plastid_default_scatter`). We highly recommend
        setting `rasterized` to `True`!

    kdalpha : float, optional
        Alpha level (transparency) for marginal distributions (Default: 0.7)

    bw_method : str
        Bandwith estimation method. See documentation for
        :obj:`scipy.stats.gaussian_kde`. (Default: "scott")

    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Figure

    dict
        Dictionary of axes. `'orig'` refers to `ax`. The central panel is `'main'`.
        Other panels will be mapped to `'top'`, `'left'` et c, if they are created.
    """
    fig, axes = _scatterhist_help(axes=axes, right_width=right_width)
    ylog = False

    if color is None:
        color = next(get_color_cycle(axes["main"]))

    if label is not None:
        scargs = copy.deepcopy(scargs)
        scargs["label"] = label

    if mask_invalid == True:
        x, y = clean_invalid(x, y, min_x=min_x, max_x=max_x, min_y=min_y, max_y=max_y)

    if "x" in log:
        axes["main"].semilogx()

    if "y" in log:
        axes["main"].semilogy()
        axes["right"].semilogy()
        ylog = True
        ymask = y > 0
    else:
        ymask = numpy.tile(True, y.shape)

    axes["main"].scatter(x, y, edgecolor=color, **scargs)

    # kernel density
    kargs = {
        "color": color,
        "alpha": kdalpha,
        "bw_method": bw_method,
    }

    if ymask.sum() > 0:
        kde_plot(y[ymask], log=ylog, axes=axes["right"], vert=True, **kargs)

    return fig, axes

[docs]def scatterhist_xy(
        x,
        y,
        color        = None,
        axes         = None,
        label        = None,
        top_height   = 0.2,
        right_width  = 0.2,
        mask_invalid = True,
        log          = "xy",
        min_x        = -numpy.inf,
        min_y        = -numpy.inf,
        max_x        = numpy.inf,
        max_y        = numpy.inf,
        scargs       = plastid_default_scatter,
        kdalpha      = 0.7,
        bw_method    = "scott"
): # yapf: disable
    """Produce a scatter plot with kernel density estimate of the marginal `x` and `y` distributions

    Parameters
    ----------
    x, y : :class:`numpy.ndarray` or list
        Pair arrays or lists of corresponding numbers

    color : matplotlib colorspec, optional
        Color to use in plot

    label : str, or None
        If not None, a label for plotting    top_height : float, optional
        fraction of `axes` height to use in top panel containing
        marginal distribution (Default: 0.2)
        
    right_width : float, optional
        fraction of `axes` width to use in right panel containing
        marginal distribution (Default: 0.2)

    top_height : float, optional
        fraction of `axes` height to use in top panel containing
        marginal distribution (Default: 0.2)

    axes : :class:`matplotlib.axes.Axes`, dict, or `None`, optional
        If a :class:`matplotlib.axes.Axes`, an axes in which to place plot.
        This axes will be split into relevant panels.

        If a dict, this is expected to be a group of pre-split axes.

        If `None`, a new figure is generated, and axes are split.
        (Default: `None`)

    mask_invalid : bool, optional
        If `True` mask out any `nan`s or `inf`s, as these mess up kernel density
        estimates and histograms in matplotlib, even if in masked arrays

    min_x, min_y, max_x, max_y : number, optional
        If supplied, set values below `min_x` to `min_x`, values larger
        than `max_x` to `max_x` and so for `min_y` and `max-y`

    log : str, "", "x", "xy", or "xy", optional
        Plot these axes on a log scale (Default: "" for no log axes)

    scargs : Keyword arguments, optional
        Arguments to pass to :func:`~matplotlib.pyplot.scatter`
        (Default: :obj:`plastid_default_scatter`). We highly recommend
        setting `rasterized` to `True`!

    kdalpha : float, optional
        Alpha level (transparency) for marginal distributions (Default: 0.7)

    bw_method : str
        Bandwith estimation method. See documentation for
        :obj:`scipy.stats.gaussian_kde`. (Default: "scott")

    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Figure

    dict
        Dictionary of axes. `'orig'` refers to `ax`. The central panel is `'main'`.
        Other panels will be mapped to `'top'`, `'left`' et c, if they are created.
    """
    fig, axes = _scatterhist_help(axes=axes, top_height=top_height, right_width=right_width)
    xlog = ylog = False

    if color is None:
        color = next(get_color_cycle(axes["main"]))

    if mask_invalid == True:
        x, y = clean_invalid(x, y, min_x=min_x, max_x=max_x, min_y=min_y, max_y=max_y)

    if label is not None:
        scargs = copy.deepcopy(scargs)
        scargs["label"] = label

    if "x" in log:
        axes["main"].semilogx()
        axes["top"].semilogx()
        xlog = True
        xmask = x > 0
    else:
        xmask = numpy.tile(True, x.shape)

    if "y" in log:
        axes["main"].semilogy()
        axes["right"].semilogy()
        ylog = True
        ymask = y > 0
    else:
        ymask = numpy.tile(True, y.shape)

    axes["main"].scatter(x, y, edgecolor=color, **scargs)

    # kernel densities
    kargs = {
        "color": color,
        "alpha": kdalpha,
        "bw_method": bw_method,
    }

    if ymask.sum() > 0:
        kde_plot(y[ymask], log=ylog, axes=axes["right"], vert=True, **kargs)

    if xmask.sum() > 0:
        kde_plot(x[xmask], log=xlog, axes=axes["top"], **kargs)

    return fig, axes


#==============================================================================
# Plots specific for genomics
#==============================================================================

[docs]def ma_plot(
        x,
        y,
        axes         = None,
        color        = None,
        label        = None,
        xlabel       = None,
        ylabel       = None,
        title        = None,
        right_width  = 0.2,
        log          = "xy",
        min_x        = -numpy.inf,
        max_x        = numpy.inf,
        min_y        = -numpy.inf,
        max_y        = numpy.inf,
        scargs       = plastid_default_scatter,
        mask_invalid = True,
        kdalpha      = 0.7
): # yapf: disable
    """Plot fold changes (:math:`\log_{2} (y/x)`) as a function of the mean of x and y (:math:`0.5*(x+y)`).

    Parameters
    ----------
    x, y : :class:`numpy.ndarray` or list
        Pair arrays or lists of corresponding numbers

    color : matplotlib colorspec, optional
        Color to use in plot

    label : str or None, optional
        If not `None`, a label for plotting

    xlabel : str or None, optional
        If not `None`, an x-axis label

    ylabel : str or None, optional
        If not `None`, a y-axis label
        
    right_width : float, optional
        fraction of `axes` width to use in right panel containing
        marginal distribution (Default: 0.2)

    axes : :class:`matplotlib.axes.Axes`, dict, or `None`, optional
        If a :class:`matplotlib.axes.Axes`, an axes in which to place plot.
        This axes will be split into relevant panels.

        If a dict, this is expected to be a group of pre-split axes.

        If `None`, a new figure is generated, and axes are split.
        (Default: `None`)

    mask_invalid : bool, optional
        If `True` mask out any `nan`s or `inf`s, as these mess up kernel density
        estimates and histograms in matplotlib, even if in masked arrays

    min_x, min_y, max_x, max_y : number, optional
        If supplied, set values below `min_x` to `min_x`, values larger
        than `max_x` to `max_x` and so for `min_y` and `max_y`

    log : str, "", "x", "xy", or "xy", optional
        Plot these axes on a log scale (Default: "xy")

    scargs : Keyword arguments, optional
        Arguments to pass to :func:`~matplotlib.pyplot.scatter`
        (Default: :obj:`plastid_default_scatter` ). Recommend: set `rasterized`
        to `True`

    kdalpha : float, optional
        Alpha level (transparency) for marginal distributions (Default: 0.7)


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Figure

    dict
        Dictionary of axes. `'orig'` refers to `ax`. The central panel is `'main'`.
        Other panels will be mapped to `'top'`, `'left'` et c, if they are created.
    """

    do_setup = axes is None
    logs = numpy.ma.masked_invalid(numpy.log2(y / x))
    imask = ~logs.mask

    ratio = y / x
    mean = 0.5 * (x + y)

    fig, axdict = scatterhist_y(
        mean[imask],
        ratio[imask],
        axes         = axes,
        min_x        = min_x,
        max_x        = max_x,
        min_y        = min_y,
        max_y        = max_y,
        log          = log,
        right_width  = right_width,
        color        = color,
        mask_invalid = mask_invalid,
        label        = label,
        kdalpha      = kdalpha
    ) # yapf: disable

    if do_setup == True:
        axdict["main"].axhline(1, color=process_black, zorder=-5, linewidth=1)
        axdict["right"].axhline(1, color=process_black, zorder=-5, linewidth=1)
        axdict["right"].xaxis.set_ticklabels([])

        if ylabel is None:
            ylabel = "log2 fold change"

        axdict["main"].set_ylabel(ylabel)
        if xlabel is not None:
            axdict["main"].set_xlabel(xlabel)

        if title is not None:
            plt.suptitle(title)

    return fig, axdict


[docs]def phase_plot(counts, labels=None, cmap=None, color=None, lighten_by=0.2, fig={}, line={}, bar={}):
    """Phasing plot for ribosome profiling

    Creates a two-panel plot:

      - the top panel is a line graph indicating the fraction of reads
        as a function of read length

      - the bottom panel is a stacked bar graph, showing the fraction
        of reads in each codon position for each read length, with
        codon position 2 stacked above position 1 stacked above position 0


    Parameters
    ----------
    counts : :class:`numpy.ndarray`
        Nx3 array of raw counts, where each row represents a read length,
        and each column a codon phase

    labels : list, optional
        Labels for each stack. If `None`, stacks are labeled sequentially by number.
        (Default: `None`)
    
    lighten_by : float, optional
        Amount by which to lighten sequential blocks in each stack. (Default: 0.10)
    
    cmap : :class:`matplotlib.colors.Colormap`, optional
        Colormap from which to generate bar colors. If supplied, will override
        any `color` attribute in `**kwargs`.

    color : matplotlib colorspec, or list of these
        Colors to use in plot. Overridden if `cmap` is supplied.

    fig : dict
        Keyword arguments to :func:`matplotlib.pylot.figure`
        
    line : dict
        Keyword arguments to pass to :func:`matplotlib.pyplot.plot`
        in top panel

    bar : dict
        Keyword arguments to pass to :func:`matplotlib.pyplot.bar`
        in bottom panel


    Returns
    -------
    :class:`matplotlib.figure.Figure`
        Figure

    tuple
        Tuple of :class:`matplotlib.axes.Axes`; the first corresponding 
        to the line graph (top panel), the second, the bar graph (bottom).
    """
    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True, **fig)

    totals = counts.sum(1)
    phases = (counts.astype(float).T / totals).T

    stacked_bar(
        phases, axes=ax2, labels=labels, lighten_by=lighten_by, cmap=cmap, color=color, **bar
    )
    ax2.set_xlabel("Read length (nt)")
    ax2.set_ylabel("Fraction in each phase")
    x = numpy.arange(len(totals)) + 0.5

    if "color" not in line:
        line["color"] = process_black

    ax1.plot(x, (totals.astype(float)) / totals.sum(), **line)

    ax1.set_ylabel("Fraction of reads")

    return fig, (ax1, ax2)