Source code for rootpy.plotting.contrib.plot_contour_matrix

from __future__ import absolute_import

import os
import tempfile
import shutil

from . import log; log = log[__name__]
from .. import Hist2D
from .gif import GIF

__all__ = [
    'plot_contour_matrix',
]

LINES = ['dashed', 'solid', 'dashdot', 'dotted']


[docs]def plot_contour_matrix(arrays, fields, filename, weights=None, sample_names=None, sample_lines=None, sample_colors=None, color_map=None, num_bins=20, num_contours=3, cell_width=2, cell_height=2, cell_margin_x=0.05, cell_margin_y=0.05, dpi=100, padding=0, animate_field=None, animate_steps=10, animate_delay=20, animate_loop=0): """ Create a matrix of contour plots showing all possible 2D projections of a multivariate dataset. You may optionally animate the contours as a cut on one of the fields is increased. ImageMagick must be installed to produce animations. Parameters ---------- arrays : list of arrays of shape [n_samples, n_fields] A list of 2D NumPy arrays for each sample. All arrays must have the same number of columns. fields : list of strings A list of the field names. filename : string The output filename. If animatation is enabled ``animate_field is not None`` then ``filename`` must have the .gif extension. weights : list of arrays, optional (default=None) List of 1D NumPy arrays of sample weights corresponding to the arrays in ``arrays``. sample_names : list of strings, optional (default=None) A list of the sample names for the legend. If None, then no legend will be shown. sample_lines : list of strings, optional (default=None) A list of matplotlib line styles for each sample. If None then line styles will cycle through 'dashed', 'solid', 'dashdot', and 'dotted'. Elements of this list may also be a list of line styles which will be cycled through for the contour lines of the corresponding sample. sample_colors : list of matplotlib colors, optional (default=None) The color of the contours for each sample. If None, then colors will be selected according to regular intervals along the ``color_map``. color_map : a matplotlib color map, optional (default=None) If ``sample_colors is None`` then select colors according to regular intervals along this matplotlib color map. If ``color_map`` is None, then the spectral color map is used. num_bins : int, optional (default=20) The number of bins along both axes of the 2D histograms. num_contours : int, optional (default=3) The number of contour line to show for each sample. cell_width : float, optional (default=2) The width, in inches, of each subplot in the matrix. cell_height : float, optional (default=2) The height, in inches, of each subplot in the matrix. cell_margin_x : float, optional (default=0.05) The horizontal margin between adjacent subplots, as a fraction of the subplot size. cell_margin_y : float, optional (default=0.05) The vertical margin between adjacent subplots, as a fraction of the subplot size. dpi : int, optional (default=100) The number of pixels per inch. padding : float, optional (default=0) The padding, as a fraction of the range of the value along each axes to guarantee around each sample's contour plot. animate_field : string, optional (default=None) The field to animate a cut along. By default no animation is produced. If ``animate_field is not None`` then ``filename`` must end in the .gif extension and an animated GIF is produced. animate_steps : int, optional (default=10) The number of frames in the animation, corresponding to the number of regularly spaced cut values to show along the range of the ``animate_field``. animate_delay : int, optional (default=20) The duration that each frame is shown in the animation as a multiple of 1 / 100 of a second. animate_loop : int, optional (default=0) The number of times to loop the animation. If zero, then loop forever. Notes ----- NumPy and matplotlib are required """ import numpy as np from .. import root2matplotlib as r2m import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator from matplotlib import cm from matplotlib.lines import Line2D # we must have at least two fields (columns) num_fields = len(fields) if num_fields < 2: raise ValueError( "record arrays must have at least two fields") # check that all arrays have the same number of columns for array in arrays: if array.shape[1] != num_fields: raise ValueError( "number of array columns does not match number of fields") if sample_colors is None: if color_map is None: color_map = cm.spectral steps = np.linspace(0, 1, len(arrays) + 2)[1:-1] sample_colors = [color_map(s) for s in steps] # determine range of each field low = np.vstack([a.min(axis=0) for a in arrays]).min(axis=0) high = np.vstack([a.max(axis=0) for a in arrays]).max(axis=0) width = np.abs(high - low) width *= padding low -= width high += width def single_frame(arrays, filename, label=None): # create the canvas and divide into matrix fig, axes = plt.subplots( nrows=num_fields, ncols=num_fields, figsize=(cell_width * num_fields, cell_height * num_fields)) fig.subplots_adjust(hspace=cell_margin_y, wspace=cell_margin_x) for ax in axes.flat: # only show the left and bottom axes ticks and labels if ax.is_last_row() and not ax.is_last_col(): ax.xaxis.set_visible(True) ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_major_locator(MaxNLocator(4, prune='both')) for tick in ax.xaxis.get_major_ticks(): tick.label.set_rotation('vertical') else: ax.xaxis.set_visible(False) if ax.is_first_col() and not ax.is_first_row(): ax.yaxis.set_visible(True) ax.yaxis.set_ticks_position('left') ax.yaxis.set_major_locator(MaxNLocator(4, prune='both')) else: ax.yaxis.set_visible(False) # turn off axes frames in upper triangular matrix for ix, iy in zip(*np.triu_indices_from(axes, k=0)): axes[ix, iy].axis('off') levels = np.linspace(0, 1, num_contours + 2)[1:-1] # plot the data for iy, ix in zip(*np.tril_indices_from(axes, k=-1)): ymin = float(low[iy]) ymax = float(high[iy]) xmin = float(low[ix]) xmax = float(high[ix]) for isample, a in enumerate(arrays): hist = Hist2D( num_bins, xmin, xmax, num_bins, ymin, ymax) if weights is not None: hist.fill_array(a[:, [ix, iy]], weights[isample]) else: hist.fill_array(a[:, [ix, iy]]) # normalize so maximum is 1.0 _max = hist.GetMaximum() if _max != 0: hist /= _max r2m.contour(hist, axes=axes[iy, ix], levels=levels, linestyles=sample_lines[isample] if sample_lines else LINES, colors=sample_colors[isample]) # label the diagonal subplots for i, field in enumerate(fields): axes[i, i].annotate(field, (0.1, 0.2), rotation=45, xycoords='axes fraction', ha='left', va='center') # make proxy artists for legend lines = [] for color in sample_colors: lines.append(Line2D([0, 0], [0, 0], color=color)) if sample_names is not None: # draw the legend leg = fig.legend(lines, sample_names, loc=(0.65, 0.8)) leg.set_frame_on(False) if label is not None: axes[0, 0].annotate(label, (0, 1), ha='left', va='top', xycoords='axes fraction') fig.savefig(filename, bbox_inches='tight', dpi=dpi) plt.close(fig) if animate_field is not None: _, ext = os.path.splitext(filename) if ext != '.gif': raise ValueError( "animation is only supported for .gif files") field_idx = fields.index(animate_field) cuts = np.linspace( low[field_idx], high[field_idx], animate_steps + 1)[:-1] gif = GIF() temp_dir = tempfile.mkdtemp() for i, cut in enumerate(cuts): frame_filename = os.path.join(temp_dir, 'frame_{0:d}.png'.format(i)) label = '{0} > {1:.2f}'.format(animate_field, cut) log.info("creating frame for {0} ...".format(label)) new_arrays = [] for array in arrays: new_arrays.append(array[array[:, field_idx] > cut]) single_frame(new_arrays, filename=frame_filename, label=label) gif.add_frame(frame_filename) gif.write(filename, delay=animate_delay, loop=animate_loop) shutil.rmtree(temp_dir) else: single_frame(arrays, filename=filename)