Source code for skopt.plots

"""Plotting functions."""
import sys
import numpy as np
from itertools import count
from functools import partial
from scipy.optimize import OptimizeResult

from skopt import expected_minimum, expected_minimum_random_sampling
from .space import Categorical

# For plot tests, matplotlib must be set to headless mode early
if 'pytest' in sys.modules:
    import matplotlib

    matplotlib.use('Agg')

import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from matplotlib.ticker import LogLocator
from matplotlib.ticker import MaxNLocator, FuncFormatter  # noqa: E402


[docs]def plot_convergence(*args, **kwargs): """Plot one or several convergence traces. Parameters ---------- args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple The result(s) for which to plot the convergence trace. - if `OptimizeResult`, then draw the corresponding single trace; - if list of `OptimizeResult`, then draw the corresponding convergence traces in transparency, along with the average convergence trace; - if tuple, then `args[i][0]` should be a string label and `args[i][1]` an `OptimizeResult` or a list of `OptimizeResult`. ax : `Axes`, optional The matplotlib axes on which to draw the plot, or `None` to create a new one. true_minimum : float, optional The true minimum value of the function, if known. yscale : None or string, optional The scale for the y-axis. Returns ------- ax : `Axes` The matplotlib axes. """ # <3 legacy python ax = kwargs.get("ax", None) true_minimum = kwargs.get("true_minimum", None) yscale = kwargs.get("yscale", None) if ax is None: ax = plt.gca() ax.set_title("Convergence plot") ax.set_xlabel("Number of calls $n$") ax.set_ylabel(r"$\min f(x)$ after $n$ calls") ax.grid() if yscale is not None: ax.set_yscale(yscale) colors = cm.viridis(np.linspace(0.25, 1.0, len(args))) for results, color in zip(args, colors): if isinstance(results, tuple): name, results = results else: name = None if isinstance(results, OptimizeResult): n_calls = len(results.x_iters) mins = [np.min(results.func_vals[:i]) for i in range(1, n_calls + 1)] ax.plot(range(1, n_calls + 1), mins, c=color, marker=".", markersize=12, lw=2, label=name) elif isinstance(results, list): n_calls = len(results[0].x_iters) iterations = range(1, n_calls + 1) mins = [[np.min(r.func_vals[:i]) for i in iterations] for r in results] for m in mins: ax.plot(iterations, m, c=color, alpha=0.2) ax.plot(iterations, np.mean(mins, axis=0), c=color, marker=".", markersize=12, lw=2, label=name) if true_minimum: ax.axhline(true_minimum, linestyle="--", color="r", lw=1, label="True minimum") if true_minimum or name: ax.legend(loc="best") return ax
[docs]def plot_regret(*args, **kwargs): """Plot one or several cumulative regret traces. Parameters ---------- args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple The result(s) for which to plot the cumulative regret trace. - if `OptimizeResult`, then draw the corresponding single trace; - if list of `OptimizeResult`, then draw the corresponding cumulative regret traces in transparency, along with the average cumulative regret trace; - if tuple, then `args[i][0]` should be a string label and `args[i][1]` an `OptimizeResult` or a list of `OptimizeResult`. ax : Axes`, optional The matplotlib axes on which to draw the plot, or `None` to create a new one. true_minimum : float, optional The true minimum value of the function, if known. yscale : None or string, optional The scale for the y-axis. Returns ------- ax : `Axes` The matplotlib axes. """ # <3 legacy python ax = kwargs.get("ax", None) true_minimum = kwargs.get("true_minimum", None) yscale = kwargs.get("yscale", None) if ax is None: ax = plt.gca() ax.set_title("Cumulative regret plot") ax.set_xlabel("Number of calls $n$") ax.set_ylabel(r"$\sum_{i=0}^n(f(x_i) - optimum)$ after $n$ calls") ax.grid() if yscale is not None: ax.set_yscale(yscale) colors = cm.viridis(np.linspace(0.25, 1.0, len(args))) if true_minimum is None: results = [] for res in args: if isinstance(res, tuple): res = res[1] if isinstance(res, OptimizeResult): results.append(res) elif isinstance(res, list): results.extend(res) true_minimum = np.min([np.min(r.func_vals) for r in results]) for results, color in zip(args, colors): if isinstance(results, tuple): name, results = results else: name = None if isinstance(results, OptimizeResult): n_calls = len(results.x_iters) regrets = [np.sum(results.func_vals[:i] - true_minimum) for i in range(1, n_calls + 1)] ax.plot(range(1, n_calls + 1), regrets, c=color, marker=".", markersize=12, lw=2, label=name) elif isinstance(results, list): n_calls = len(results[0].x_iters) iterations = range(1, n_calls + 1) regrets = [[np.sum(r.func_vals[:i] - true_minimum) for i in iterations] for r in results] for cr in regrets: ax.plot(iterations, cr, c=color, alpha=0.2) ax.plot(iterations, np.mean(regrets, axis=0), c=color, marker=".", markersize=12, lw=2, label=name) if name: ax.legend(loc="best") return ax
def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None): # Work out min, max of y axis for the diagonal so we can adjust # them all to the same value diagonal_ylim = (np.min([ax[i, i].get_ylim()[0] for i in range(space.n_dims)]), np.max([ax[i, i].get_ylim()[1] for i in range(space.n_dims)])) if dim_labels is None: dim_labels = ["$X_{%i}$" % i if d.name is None else d.name for i, d in enumerate(space.dimensions)] # Axes for categorical dimensions are really integers; we have to # label them with the category names iscat = [isinstance(dim, Categorical) for dim in space.dimensions] # Deal with formatting of the axes for i in range(space.n_dims): # rows for j in range(space.n_dims): # columns ax_ = ax[i, j] if j > i: ax_.axis("off") elif i > j: # off-diagonal plots # plots on the diagonal are special, like Texas. They have # their own range so do not mess with them. if not iscat[i]: # bounds not meaningful for categoricals ax_.set_ylim(*space.dimensions[i].bounds) if iscat[j]: # partial() avoids creating closures in a loop ax_.xaxis.set_major_formatter(FuncFormatter( partial(_cat_format, space.dimensions[j]))) else: ax_.set_xlim(*space.dimensions[j].bounds) if j == 0: # only leftmost column (0) gets y labels ax_.set_ylabel(dim_labels[i]) if iscat[i]: # Set category labels for left column ax_.yaxis.set_major_formatter(FuncFormatter( partial(_cat_format, space.dimensions[i]))) else: ax_.set_yticklabels([]) # for all rows except ... if i < space.n_dims - 1: ax_.set_xticklabels([]) # ... the bottom row else: [l.set_rotation(45) for l in ax_.get_xticklabels()] ax_.set_xlabel(dim_labels[j]) # configure plot for linear vs log-scale if space.dimensions[j].prior == 'log-uniform': ax_.set_xscale('log') else: ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both', integer=iscat[j])) if space.dimensions[i].prior == 'log-uniform': ax_.set_yscale('log') else: ax_.yaxis.set_major_locator(MaxNLocator(6, prune='both', integer=iscat[i])) else: # diagonal plots ax_.set_ylim(*diagonal_ylim) ax_.yaxis.tick_right() ax_.yaxis.set_label_position('right') ax_.yaxis.set_ticks_position('both') ax_.set_ylabel(ylabel) ax_.xaxis.tick_top() ax_.xaxis.set_label_position('top') ax_.set_xlabel(dim_labels[j]) if space.dimensions[i].prior == 'log-uniform': ax_.set_xscale('log') else: ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both', integer=iscat[i])) if iscat[i]: ax_.xaxis.set_major_formatter(FuncFormatter( partial(_cat_format, space.dimensions[i]))) return ax
[docs]def partial_dependence(space, model, i, j=None, sample_points=None, n_samples=250, n_points=40, x_eval=None): """Calculate the partial dependence for dimensions `i` and `j` with respect to the objective value, as approximated by `model`. The partial dependence plot shows how the value of the dimensions `i` and `j` influence the `model` predictions after "averaging out" the influence of all other dimensions. When `x_eval` is not `None`, the given values are used instead of random samples. In this case, `n_samples` will be ignored. Parameters ---------- space : `Space` The parameter space over which the minimization was performed. model Surrogate model for the objective function. i : int The first dimension for which to calculate the partial dependence. j : int, default=None The second dimension for which to calculate the partial dependence. To calculate the 1D partial dependence on `i` alone set `j=None`. sample_points : np.array, shape=(n_points, n_dims), default=None Only used when `x_eval=None`, i.e in case partial dependence should be calculated. Randomly sampled and transformed points to use when averaging the model function at each of the `n_points` when using partial dependence. n_samples : int, default=100 Number of random samples to use for averaging the model function at each of the `n_points` when using partial dependence. Only used when `sample_points=None` and `x_eval=None`. n_points : int, default=40 Number of points at which to evaluate the partial dependence along each dimension `i` and `j`. x_eval : list, default=None `x_eval` is a list of parameter values or None. In case `x_eval` is not None, the parsed dependence will be calculated using these values. Otherwise, random selected samples will be used. Returns ------- For 1D partial dependence: xi : np.array The points at which the partial dependence was evaluated. yi : np.array The value of the model at each point `xi`. For 2D partial dependence: xi : np.array, shape=n_points The points at which the partial dependence was evaluated. yi : np.array, shape=n_points The points at which the partial dependence was evaluated. zi : np.array, shape=(n_points, n_points) The value of the model at each point `(xi, yi)`. For Categorical variables, the `xi` (and `yi` for 2D) returned are the indices of the variable in `Dimension.categories`. """ # The idea is to step through one dimension, evaluating the model with # that dimension fixed and averaging either over random values or over # the given ones in x_val in all other dimensions. # (Or step through 2 dimensions when i and j are given.) # Categorical dimensions make this interesting, because they are one- # hot-encoded, so there is a one-to-many mapping of input dimensions # to transformed (model) dimensions. # If we haven't parsed an x_eval list we use random sampled values instead if x_eval is None and sample_points is None: sample_points = space.transform(space.rvs(n_samples=n_samples)) elif sample_points is None: sample_points = space.transform([x_eval]) # dim_locs[i] is the (column index of the) start of dim i in # sample_points. # This is usefull when we are using one hot encoding, i.e using # categorical values dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions]) if j is None: # We sample evenly instead of randomly. This is necessary when using # categorical values xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points) yi = [] for x_ in xi_transformed: rvs_ = np.array(sample_points) # copy # We replace the values in the dimension that we want to keep # fixed rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x_ # In case of `x_eval=None` rvs conists of random samples. # Calculating the mean of these samples is how partial dependence # is implemented. yi.append(np.mean(model.predict(rvs_))) return xi, yi else: xi, xi_transformed = _evenly_sample(space.dimensions[j], n_points) yi, yi_transformed = _evenly_sample(space.dimensions[i], n_points) zi = [] for x_ in xi_transformed: row = [] for y_ in yi_transformed: rvs_ = np.array(sample_points) # copy rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x_ rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y_ row.append(np.mean(model.predict(rvs_))) zi.append(row) return xi, yi, np.array(zi).T
[docs]def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2, zscale='linear', dimensions=None, sample_source='random', minimum='result', n_minimum_search=None): """Pairwise dependence plot of the objective function. The diagonal shows the partial dependence for dimension `i` with respect to the objective function. The off-diagonal shows the partial dependence for dimensions `i` and `j` with respect to the objective function. The objective function is approximated by `result.model.` Pairwise scatter plots of the points at which the objective function was directly evaluated are shown on the off-diagonal. A red point indicates per default the best observed minimum, but this can be changed by changing argument ´minimum´. Parameters ---------- result : `OptimizeResult` The result for which to create the scatter plot matrix. levels : int, default=10 Number of levels to draw on the contour plot, passed directly to `plt.contour()`. n_points : int, default=40 Number of points at which to evaluate the partial dependence along each dimension. n_samples : int, default=250 Number of samples to use for averaging the model function at each of the `n_points` when `sample_method` is set to 'random'. size : float, default=2 Height (in inches) of each facet. zscale : str, default='linear' Scale to use for the z axis of the contour plots. Either 'linear' or 'log'. dimensions : list of str, default=None Labels of the dimension variables. `None` defaults to `space.dimensions[i].name`, or if also `None` to `['X_0', 'X_1', ..]`. sample_source : str or list of floats, default='random' Defines to samples generation to use for averaging the model function at each of the `n_points`. A partial dependence plot is only generated, when `sample_source` is set to 'random' and `n_samples` is sufficient. `sample_source` can also be a list of floats, which is then used for averaging. Valid strings: - 'random' - `n_samples` random samples will used - 'result' - Use only the best observed parameters - 'expected_minimum' - Parameters that gives the best minimum Calculated using scipy's minimize method. This method currently does not work with categorical values. - 'expected_minimum_random' - Parameters that gives the best minimum when using naive random sampling. Works with categorical values. minimum : str or list of floats, default = 'result' Defines the values for the red points in the plots. Valid strings: - 'result' - Use best observed parameters - 'expected_minimum' - Parameters that gives the best minimum Calculated using scipy's minimize method. This method currently does not work with categorical values. - 'expected_minimum_random' - Parameters that gives the best minimum when using naive random sampling. Works with categorical values n_minimum_search : int, default = None Determines how many points should be evaluated to find the minimum when using 'expected_minimum' or 'expected_minimum_random'. Parameter is used when `sample_source` and/or `minimum` is set to 'expected_minimum' or 'expected_minimum_random'. Returns ------- ax : `Axes` The matplotlib axes. """ # Here we define the values for which to plot the red dot (2d plot) and # the red dotted line (1d plot). # These same values will be used for evaluating the plots when # calculating dependence. (Unless partial # dependence is to be used instead). space = result.space if space.n_dims == 1: raise ValueError("plot_objective needs at least two" "variables. Found only one.") x_vals = _evaluate_min_params(result, minimum, n_minimum_search) if sample_source == "random": x_eval = None else: x_eval = _evaluate_min_params(result, sample_source, n_minimum_search) rvs_transformed = space.transform(space.rvs(n_samples=n_samples)) samples, minimum, _ = _map_categories(space, result.x_iters, x_vals) if zscale == 'log': locator = LogLocator() elif zscale == 'linear': locator = None else: raise ValueError("Valid values for zscale are 'linear' and 'log'," " not '%s'." % zscale) fig, ax = plt.subplots(space.n_dims, space.n_dims, figsize=(size * space.n_dims, size * space.n_dims)) fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, hspace=0.1, wspace=0.1) for i in range(space.n_dims): for j in range(space.n_dims): if i == j: xi, yi = partial_dependence(space, result.models[-1], i, j=None, sample_points=rvs_transformed, n_points=n_points, x_eval=x_eval) ax[i, i].plot(xi, yi) ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1) # lower triangle elif i > j: xi, yi, zi = partial_dependence(space, result.models[-1], i, j, rvs_transformed, n_points, x_eval=x_eval) ax[i, j].contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r') ax[i, j].scatter(samples[:, j], samples[:, i], c='k', s=10, lw=0.) ax[i, j].scatter(minimum[j], minimum[i], c=['r'], s=20, lw=0.) ylabel = "Partial dependence" return _format_scatter_plot_axes(ax, space, ylabel=ylabel, dim_labels=dimensions)
[docs]def plot_evaluations(result, bins=20, dimensions=None): """Visualize the order in which points where sampled. The scatter plot matrix shows at which points in the search space and in which order samples were evaluated. Pairwise scatter plots are shown on the off-diagonal for each dimension of the search space. The order in which samples were evaluated is encoded in each point's color. The diagonal shows a histogram of sampled values for each dimension. A red point indicates the found minimum. Parameters ---------- result : `OptimizeResult` The result for which to create the scatter plot matrix. bins : int, bins=20 Number of bins to use for histograms on the diagonal. dimensions : list of str, default=None Labels of the dimension variables. `None` defaults to `space.dimensions[i].name`, or if also `None` to `['X_0', 'X_1', ..]`. Returns ------- ax : `Axes` The matplotlib axes. """ space = result.space # Convert categoricals to integers, so we can ensure consistent ordering. # Assign indices to categories in the order they appear in the Dimension. # Matplotlib's categorical plotting functions are only present in v 2.1+, # and may order categoricals differently in different plots anyway. samples, minimum, iscat = _map_categories(space, result.x_iters, result.x) order = range(samples.shape[0]) fig, ax = plt.subplots(space.n_dims, space.n_dims, figsize=(2 * space.n_dims, 2 * space.n_dims)) fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, hspace=0.1, wspace=0.1) for i in range(space.n_dims): for j in range(space.n_dims): if i == j: if iscat[j]: bins_ = len(space.dimensions[j].categories) elif space.dimensions[j].prior == 'log-uniform': low, high = space.bounds[j] bins_ = np.logspace(np.log10(low), np.log10(high), bins) else: bins_ = bins ax[i, i].hist( samples[:, j], bins=bins_, range=None if iscat[j] else space.dimensions[j].bounds) # lower triangle elif i > j: ax[i, j].scatter(samples[:, j], samples[:, i], c=order, s=40, lw=0., cmap='viridis') ax[i, j].scatter(minimum[j], minimum[i], c=['r'], s=20, lw=0.) return _format_scatter_plot_axes(ax, space, ylabel="Number of samples", dim_labels=dimensions)
def _map_categories(space, points, minimum): """ Map categorical values to integers in a set of points. Returns ------- mapped_points : np.array, shape=points.shape A copy of `points` with categoricals replaced with their indices in the corresponding `Dimension`. mapped_minimum : np.array, shape (space.n_dims,) A copy of `minimum` with categoricals replaced with their indices in the corresponding `Dimension`. iscat : np.array, shape (space.n_dims,) Boolean array indicating whether dimension `i` in the `space` is categorical. """ points = np.asarray(points, dtype=object) # Allow slicing, preserve cats iscat = np.repeat(False, space.n_dims) min_ = np.zeros(space.n_dims) pts_ = np.zeros(points.shape) for i, dim in enumerate(space.dimensions): if isinstance(dim, Categorical): iscat[i] = True catmap = dict(zip(dim.categories, count())) pts_[:, i] = [catmap[cat] for cat in points[:, i]] min_[i] = catmap[minimum[i]] else: pts_[:, i] = points[:, i] min_[i] = minimum[i] return pts_, min_, iscat def _evenly_sample(dim, n_points): """Return `n_points` evenly spaced points from a Dimension. Parameters ---------- dim : `Dimension` The Dimension to sample from. Can be categorical; evenly-spaced category indices are chosen in order without replacement (result may be smaller than `n_points`). n_points : int The number of points to sample from `dim`. Returns ------- xi : np.array The sampled points in the Dimension. For Categorical dimensions, returns the index of the value in `dim.categories`. xi_transformed : np.array The transformed values of `xi`, for feeding to a model. """ cats = np.array(getattr(dim, 'categories', []), dtype=object) if len(cats): # Sample categoricals while maintaining order xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points), dtype=int) xi_transformed = dim.transform(cats[xi]) else: bounds = dim.bounds # XXX use linspace(*bounds, n_points) after python2 support ends xi = np.linspace(bounds[0], bounds[1], n_points) xi_transformed = dim.transform(xi) return xi, xi_transformed def _cat_format(dimension, x, _): """Categorical axis tick formatter function. Returns the name of category `x` in `dimension`. Used with `matplotlib.ticker.FuncFormatter`.""" return str(dimension.categories[int(x)]) def _evaluate_min_params(result, params='result', n_minimum_search=None, random_state=None): """Returns the minimum based on `params`""" x_vals = None space = result.space if isinstance(params, str): if params == 'result': # Using the best observed result x_vals = result.x elif params == 'expected_minimum': if result.space.is_partly_categorical: # space is also categorical raise ValueError('expected_minimum does not support any' 'categorical values') # Do a gradient based minimum search using scipys own minimizer if n_minimum_search: # If a value for # expected_minimum_samples has been parsed x_vals, _ = expected_minimum( result, n_random_starts=n_minimum_search, random_state=random_state) else: # Use standard of 20 random starting points x_vals, _ = expected_minimum(result, n_random_starts=20, random_state=random_state) elif params == 'expected_minimum_random': # Do a minimum search by evaluating the function with # n_samples sample values if n_minimum_search: # If a value for # n_minimum_samples has been parsed x_vals, _ = expected_minimum_random_sampling( result, n_random_starts=n_minimum_search, random_state=random_state) else: # Use standard of 10^n_parameters. Note this # becomes very slow for many parameters x_vals, _ = expected_minimum_random_sampling( result, n_random_starts=10 ** len(result.x), random_state=random_state) else: raise ValueError('Argument ´eval_min_params´ must be a valid' 'string (´result´)') elif isinstance(params, list): assert len(params) == len(result.x), 'Argument' \ '´eval_min_params´ of type list must have same length as' \ 'number of features' # Using defined x_values x_vals = params else: raise ValueError('Argument ´eval_min_params´ must' 'be a string or a list') return x_vals