"""Plotting functions."""
import sys
import numpy as np
from itertools import count
from functools import partial
from scipy.optimize import OptimizeResult
from .acquisition import _gaussian_acquisition
from skopt import expected_minimum, expected_minimum_random_sampling
from .space import Categorical
from collections import Counter
# For plot tests, matplotlib must be set to headless mode early
if 'pytest' in sys.modules:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from matplotlib.ticker import LogLocator
from matplotlib.ticker import MaxNLocator, FuncFormatter # noqa: E402
[docs]def plot_convergence(*args, **kwargs):
"""Plot one or several convergence traces.
Parameters
----------
args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple
The result(s) for which to plot the convergence trace.
- if `OptimizeResult`, then draw the corresponding single trace;
- if list of `OptimizeResult`, then draw the corresponding convergence
traces in transparency, along with the average convergence trace;
- if tuple, then `args[i][0]` should be a string label and `args[i][1]`
an `OptimizeResult` or a list of `OptimizeResult`.
ax : `Axes`, optional
The matplotlib axes on which to draw the plot, or `None` to create
a new one.
true_minimum : float, optional
The true minimum value of the function, if known.
yscale : None or string, optional
The scale for the y-axis.
Returns
-------
ax : `Axes`
The matplotlib axes.
"""
# <3 legacy python
ax = kwargs.get("ax", None)
true_minimum = kwargs.get("true_minimum", None)
yscale = kwargs.get("yscale", None)
if ax is None:
ax = plt.gca()
ax.set_title("Convergence plot")
ax.set_xlabel("Number of calls $n$")
ax.set_ylabel(r"$\min f(x)$ after $n$ calls")
ax.grid()
if yscale is not None:
ax.set_yscale(yscale)
colors = cm.viridis(np.linspace(0.25, 1.0, len(args)))
for results, color in zip(args, colors):
if isinstance(results, tuple):
name, results = results
else:
name = None
if isinstance(results, OptimizeResult):
n_calls = len(results.x_iters)
mins = [np.min(results.func_vals[:i])
for i in range(1, n_calls + 1)]
ax.plot(range(1, n_calls + 1), mins, c=color,
marker=".", markersize=12, lw=2, label=name)
elif isinstance(results, list):
n_calls = len(results[0].x_iters)
iterations = range(1, n_calls + 1)
mins = [[np.min(r.func_vals[:i]) for i in iterations]
for r in results]
for m in mins:
ax.plot(iterations, m, c=color, alpha=0.2)
ax.plot(iterations, np.mean(mins, axis=0), c=color,
marker=".", markersize=12, lw=2, label=name)
if true_minimum:
ax.axhline(true_minimum, linestyle="--",
color="r", lw=1,
label="True minimum")
if true_minimum or name:
ax.legend(loc="best")
return ax
[docs]def plot_gaussian_process(res, **kwargs):
"""Plots the optimization results and the gaussian process
for 1-D objective functions.
Parameters
----------
res : `OptimizeResult`
The result for which to plot the gaussian process.
ax : `Axes`, optional
The matplotlib axes on which to draw the plot, or `None` to create
a new one.
n_calls : int, default: -1
Can be used to evaluate the model at call `n_calls`.
objective : func, default: None
Defines the true objective function. Must have one input parameter.
n_points : int, default: 1000
Number of data points used to create the plots
noise_level : float, default: 0
Sets the estimated noise level
show_legend : boolean, default: True
When True, a legend is plotted.
show_title : boolean, default: True
When True, a title containing the found minimum value
is shown
show_acq_func : boolean, default: False
When True, the acquisition function is plotted
show_next_point : boolean, default: False
When True, the next evaluated point is plotted
show_observations : boolean, default: True
When True, observations are plotted as dots.
show_mu : boolean, default: True
When True, the predicted model is shown.
Returns
-------
ax : `Axes`
The matplotlib axes.
"""
ax = kwargs.get("ax", None)
n_calls = kwargs.get("n_calls", -1)
objective = kwargs.get("objective", None)
noise_level = kwargs.get("noise_level", 0)
show_legend = kwargs.get("show_legend", True)
show_title = kwargs.get("show_title", True)
show_acq_func = kwargs.get("show_acq_func", False)
show_next_point = kwargs.get("show_next_point", False)
show_observations = kwargs.get("show_observations", True)
show_mu = kwargs.get("show_mu", True)
n_points = kwargs.get("n_points", 1000)
if ax is None:
ax = plt.gca()
n_dims = res.space.n_dims
assert n_dims == 1, "Space dimension must be 1"
dimension = res.space.dimensions[0]
x, x_model = _evenly_sample(dimension, n_points)
x = x.reshape(-1, 1)
x_model = x_model.reshape(-1, 1)
if res.specs is not None and "args" in res.specs:
n_random = res.specs["args"].get('n_random_starts', None)
acq_func = res.specs["args"].get("acq_func", "EI")
acq_func_kwargs = res.specs["args"].get("acq_func_kwargs", {})
if acq_func_kwargs is None:
acq_func_kwargs = {}
if acq_func is None or acq_func == "gp_hedge":
acq_func = "EI"
if n_random is None:
n_random = len(res.x_iters) - len(res.models)
if objective is not None:
fx = np.array([objective(x_i) for x_i in x])
if n_calls < 0:
model = res.models[-1]
curr_x_iters = res.x_iters
curr_func_vals = res.func_vals
else:
model = res.models[n_calls]
curr_x_iters = res.x_iters[:n_random + n_calls]
curr_func_vals = res.func_vals[:n_random + n_calls]
# Plot true function.
if objective is not None:
ax.plot(x, fx, "r--", label="True (unknown)")
ax.fill(np.concatenate(
[x, x[::-1]]),
np.concatenate(([fx_i - 1.9600 * noise_level
for fx_i in fx],
[fx_i + 1.9600 * noise_level
for fx_i in fx[::-1]])),
alpha=.2, fc="r", ec="None")
# Plot GP(x) + contours
if show_mu:
y_pred, sigma = model.predict(x_model, return_std=True)
ax.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
ax.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_pred - 1.9600 * sigma,
(y_pred + 1.9600 * sigma)[::-1]]),
alpha=.2, fc="g", ec="None")
# Plot sampled points
if show_observations:
ax.plot(curr_x_iters, curr_func_vals,
"r.", markersize=8, label="Observations")
if (show_mu or show_observations or objective is not None)\
and show_acq_func:
ax_ei = ax.twinx()
ax_ei.set_ylabel(str(acq_func) + "(x)")
plot_both = True
else:
ax_ei = ax
plot_both = False
if show_acq_func:
acq = _gaussian_acquisition(x_model, model,
y_opt=np.min(curr_func_vals),
acq_func=acq_func,
acq_func_kwargs=acq_func_kwargs)
next_x = x[np.argmin(acq)]
next_acq = acq[np.argmin(acq)]
acq = - acq
next_acq = -next_acq
ax_ei.plot(x, acq, "b", label=str(acq_func) + "(x)")
if not plot_both:
ax_ei.fill_between(x.ravel(), 0, acq.ravel(),
alpha=0.3, color='blue')
if show_next_point and next_x is not None:
ax_ei.plot(next_x, next_acq, "bo", markersize=6,
label="Next query point")
if show_title:
ax.set_title(r"x* = %.4f, f(x*) = %.4f" % (res.x[0], res.fun))
# Adjust plot layout
ax.grid()
ax.set_xlabel("x")
ax.set_ylabel("f(x)")
if show_legend:
if plot_both:
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax_ei.get_legend_handles_labels()
ax_ei.legend(lines + lines2, labels + labels2, loc="best",
prop={'size': 6}, numpoints=1)
else:
ax.legend(loc="best", prop={'size': 6}, numpoints=1)
return ax
[docs]def plot_regret(*args, **kwargs):
"""Plot one or several cumulative regret traces.
Parameters
----------
args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple
The result(s) for which to plot the cumulative regret trace.
- if `OptimizeResult`, then draw the corresponding single trace;
- if list of `OptimizeResult`, then draw the corresponding cumulative
regret traces in transparency, along with the average cumulative
regret trace;
- if tuple, then `args[i][0]` should be a string label and `args[i][1]`
an `OptimizeResult` or a list of `OptimizeResult`.
ax : Axes`, optional
The matplotlib axes on which to draw the plot, or `None` to create
a new one.
true_minimum : float, optional
The true minimum value of the function, if known.
yscale : None or string, optional
The scale for the y-axis.
Returns
-------
ax : `Axes`
The matplotlib axes.
"""
# <3 legacy python
ax = kwargs.get("ax", None)
true_minimum = kwargs.get("true_minimum", None)
yscale = kwargs.get("yscale", None)
if ax is None:
ax = plt.gca()
ax.set_title("Cumulative regret plot")
ax.set_xlabel("Number of calls $n$")
ax.set_ylabel(r"$\sum_{i=0}^n(f(x_i) - optimum)$ after $n$ calls")
ax.grid()
if yscale is not None:
ax.set_yscale(yscale)
colors = cm.viridis(np.linspace(0.25, 1.0, len(args)))
if true_minimum is None:
results = []
for res in args:
if isinstance(res, tuple):
res = res[1]
if isinstance(res, OptimizeResult):
results.append(res)
elif isinstance(res, list):
results.extend(res)
true_minimum = np.min([np.min(r.func_vals) for r in results])
for results, color in zip(args, colors):
if isinstance(results, tuple):
name, results = results
else:
name = None
if isinstance(results, OptimizeResult):
n_calls = len(results.x_iters)
regrets = [np.sum(results.func_vals[:i] - true_minimum)
for i in range(1, n_calls + 1)]
ax.plot(range(1, n_calls + 1), regrets, c=color,
marker=".", markersize=12, lw=2, label=name)
elif isinstance(results, list):
n_calls = len(results[0].x_iters)
iterations = range(1, n_calls + 1)
regrets = [[np.sum(r.func_vals[:i] - true_minimum) for i in
iterations] for r in results]
for cr in regrets:
ax.plot(iterations, cr, c=color, alpha=0.2)
ax.plot(iterations, np.mean(regrets, axis=0), c=color,
marker=".", markersize=12, lw=2, label=name)
if name:
ax.legend(loc="best")
return ax
def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
dim_labels=None):
# Work out min, max of y axis for the diagonal so we can adjust
# them all to the same value
diagonal_ylim = _get_ylim_diagonal(ax)
# Number of search-space dimensions we are using.
if isinstance(ax, (list, np.ndarray)):
n_dims = len(plot_dims)
else:
n_dims = 1
if dim_labels is None:
dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
for i, d in plot_dims]
# Axes for categorical dimensions are really integers; we have to
# label them with the category names
iscat = [isinstance(dim[1], Categorical) for dim in plot_dims]
# Deal with formatting of the axes
for i in range(n_dims): # rows
for j in range(n_dims): # columns
if n_dims > 1:
ax_ = ax[i, j]
else:
ax_ = ax
index_i, dim_i = plot_dims[i]
index_j, dim_j = plot_dims[j]
if j > i:
ax_.axis("off")
elif i > j: # off-diagonal plots
# plots on the diagonal are special, like Texas. They have
# their own range so do not mess with them.
if not iscat[i]: # bounds not meaningful for categoricals
ax_.set_ylim(*dim_i.bounds)
if iscat[j]:
# partial() avoids creating closures in a loop
ax_.xaxis.set_major_formatter(FuncFormatter(
partial(_cat_format, dim_j)))
else:
ax_.set_xlim(*dim_j.bounds)
if j == 0: # only leftmost column (0) gets y labels
ax_.set_ylabel(dim_labels[i])
if iscat[i]: # Set category labels for left column
ax_.yaxis.set_major_formatter(FuncFormatter(
partial(_cat_format, dim_i)))
else:
ax_.set_yticklabels([])
# for all rows except ...
if i < n_dims - 1:
ax_.set_xticklabels([])
# ... the bottom row
else:
[l.set_rotation(45) for l in ax_.get_xticklabels()]
ax_.set_xlabel(dim_labels[j])
# configure plot for linear vs log-scale
if dim_j.prior == 'log-uniform':
ax_.set_xscale('log')
else:
ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
integer=iscat[j]))
if dim_i.prior == 'log-uniform':
ax_.set_yscale('log')
else:
ax_.yaxis.set_major_locator(MaxNLocator(6, prune='both',
integer=iscat[i]))
else: # diagonal plots
ax_.set_ylim(*diagonal_ylim)
if not iscat[i]:
low, high = dim_i.bounds
ax_.set_xlim(low, high)
ax_.yaxis.tick_right()
ax_.yaxis.set_label_position('right')
ax_.yaxis.set_ticks_position('both')
ax_.set_ylabel(ylabel)
ax_.xaxis.tick_top()
ax_.xaxis.set_label_position('top')
ax_.set_xlabel(dim_labels[j])
if dim_i.prior == 'log-uniform':
ax_.set_xscale('log')
else:
ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
integer=iscat[i]))
if iscat[i]:
ax_.xaxis.set_major_formatter(FuncFormatter(
partial(_cat_format, dim_i)))
return ax
[docs]def partial_dependence(space, model, i, j=None, sample_points=None,
n_samples=250, n_points=40, x_eval=None):
"""Calculate the partial dependence for dimensions `i` and `j` with
respect to the objective value, as approximated by `model`.
The partial dependence plot shows how the value of the dimensions
`i` and `j` influence the `model` predictions after "averaging out"
the influence of all other dimensions.
When `x_eval` is not `None`, the given values are used instead of
random samples. In this case, `n_samples` will be ignored.
Parameters
----------
space : `Space`
The parameter space over which the minimization was performed.
model
Surrogate model for the objective function.
i : int
The first dimension for which to calculate the partial dependence.
j : int, default=None
The second dimension for which to calculate the partial dependence.
To calculate the 1D partial dependence on `i` alone set `j=None`.
sample_points : np.array, shape=(n_points, n_dims), default=None
Only used when `x_eval=None`, i.e in case partial dependence should
be calculated.
Randomly sampled and transformed points to use when averaging
the model function at each of the `n_points` when using partial
dependence.
n_samples : int, default=100
Number of random samples to use for averaging the model function
at each of the `n_points` when using partial dependence. Only used
when `sample_points=None` and `x_eval=None`.
n_points : int, default=40
Number of points at which to evaluate the partial dependence
along each dimension `i` and `j`.
x_eval : list, default=None
`x_eval` is a list of parameter values or None. In case `x_eval`
is not None, the parsed dependence will be calculated using these
values.
Otherwise, random selected samples will be used.
Returns
-------
For 1D partial dependence:
xi : np.array
The points at which the partial dependence was evaluated.
yi : np.array
The value of the model at each point `xi`.
For 2D partial dependence:
xi : np.array, shape=n_points
The points at which the partial dependence was evaluated.
yi : np.array, shape=n_points
The points at which the partial dependence was evaluated.
zi : np.array, shape=(n_points, n_points)
The value of the model at each point `(xi, yi)`.
For Categorical variables, the `xi` (and `yi` for 2D) returned are
the indices of the variable in `Dimension.categories`.
"""
# If we haven't parsed an x_eval list we use random sampled values instead
if x_eval is None and sample_points is None:
sample_points = space.transform(space.rvs(n_samples=n_samples))
elif sample_points is None:
sample_points = space.transform([x_eval])
if j is None:
return partial_dependence_1D(space, model, i,
sample_points, n_points)
else:
return partial_dependence_2D(space, model, i, j,
sample_points, n_points)
[docs]def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
zscale='linear', dimensions=None, sample_source='random',
minimum='result', n_minimum_search=None, plot_dims=None,
show_points=True, cmap='viridis_r'):
"""Plot a 2-d matrix with so-called Partial Dependence plots
of the objective function. This shows the influence of each
search-space dimension on the objective function.
This uses the last fitted model for estimating the objective function.
The diagonal shows the effect of a single dimension on the
objective function, while the plots below the diagonal show
the effect on the objective function when varying two dimensions.
The Partial Dependence is calculated by averaging the objective value
for a number of random samples in the search-space,
while keeping one or two dimensions fixed at regular intervals. This
averages out the effect of varying the other dimensions and shows
the influence of one or two dimensions on the objective function.
Also shown are small black dots for the points that were sampled
during optimization.
A red star indicates per default the best observed minimum, but
this can be changed by changing argument ´minimum´.
.. note::
The Partial Dependence plot is only an estimation of the surrogate
model which in turn is only an estimation of the true objective
function that has been optimized. This means the plots show
an "estimate of an estimate" and may therefore be quite imprecise,
especially if few samples have been collected during the
optimization
(e.g. less than 100-200 samples), and in regions of the search-space
that have been sparsely sampled (e.g. regions away from the optimum).
This means that the plots may change each time you run the
optimization and they should not be considered completely reliable.
These compromises are necessary because we cannot evaluate the
expensive objective function in order to plot it, so we have to use
the cheaper surrogate model to plot its contour. And in order to
show search-spaces with 3 dimensions or more in a 2-dimensional
plot,
we further need to map those dimensions to only 2-dimensions using
the Partial Dependence, which also causes distortions in the plots.
Parameters
----------
result : `OptimizeResult`
The optimization results from calling e.g. `gp_minimize()`.
levels : int, default=10
Number of levels to draw on the contour plot, passed directly
to `plt.contourf()`.
n_points : int, default=40
Number of points at which to evaluate the partial dependence
along each dimension.
n_samples : int, default=250
Number of samples to use for averaging the model function
at each of the `n_points` when `sample_method` is set to 'random'.
size : float, default=2
Height (in inches) of each facet.
zscale : str, default='linear'
Scale to use for the z axis of the contour plots. Either 'linear'
or 'log'.
dimensions : list of str, default=None
Labels of the dimension
variables. `None` defaults to `space.dimensions[i].name`, or
if also `None` to `['X_0', 'X_1', ..]`.
plot_dims : list of str and int, default=None
List of dimension names or dimension indices from the
search-space dimensions to be included in the plot.
If `None` then use all dimensions except constant ones
from the search-space.
sample_source : str or list of floats, default='random'
Defines to samples generation to use for averaging the model function
at each of the `n_points`.
A partial dependence plot is only generated, when `sample_source`
is set to 'random' and `n_samples` is sufficient.
`sample_source` can also be a list of
floats, which is then used for averaging.
Valid strings:
- 'random' - `n_samples` random samples will used
- 'result' - Use only the best observed parameters
- 'expected_minimum' - Parameters that gives the best
minimum Calculated using scipy's minimize method.
This method currently does not work with categorical values.
- 'expected_minimum_random' - Parameters that gives the
best minimum when using naive random sampling.
Works with categorical values.
minimum : str or list of floats, default = 'result'
Defines the values for the red points in the plots.
Valid strings:
- 'result' - Use best observed parameters
- 'expected_minimum' - Parameters that gives the best
minimum Calculated using scipy's minimize method.
This method currently does not work with categorical values.
- 'expected_minimum_random' - Parameters that gives the
best minimum when using naive random sampling.
Works with categorical values
n_minimum_search : int, default = None
Determines how many points should be evaluated
to find the minimum when using 'expected_minimum' or
'expected_minimum_random'. Parameter is used when
`sample_source` and/or `minimum` is set to
'expected_minimum' or 'expected_minimum_random'.
show_points: bool, default = True
Choose whether to show evaluated points in the
contour plots.
cmap: str or Colormap, default = 'viridis_r'
Color map for contour plots. Passed directly to
`plt.contourf()`
Returns
-------
ax : `Matplotlib.Axes`
A 2-d matrix of Axes-objects with the sub-plots.
"""
# Here we define the values for which to plot the red dot (2d plot) and
# the red dotted line (1d plot).
# These same values will be used for evaluating the plots when
# calculating dependence. (Unless partial
# dependence is to be used instead).
space = result.space
# Get the relevant search-space dimensions.
if plot_dims is None:
# Get all dimensions.
plot_dims = []
for row in range(space.n_dims):
if space.dimensions[row].is_constant:
continue
plot_dims.append((row, space.dimensions[row]))
else:
plot_dims = space[plot_dims]
# Number of search-space dimensions we are using.
n_dims = len(plot_dims)
if dimensions is not None:
assert len(dimensions) == n_dims
x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
if sample_source == "random":
x_eval = None
samples = space.transform(space.rvs(n_samples=n_samples))
else:
x_eval = _evaluate_min_params(result, sample_source,
n_minimum_search)
samples = space.transform([x_eval])
x_samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
if zscale == 'log':
locator = LogLocator()
elif zscale == 'linear':
locator = None
else:
raise ValueError("Valid values for zscale are 'linear' and 'log',"
" not '%s'." % zscale)
fig, ax = plt.subplots(n_dims, n_dims,
figsize=(size * n_dims, size * n_dims))
fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
hspace=0.1, wspace=0.1)
for i in range(n_dims):
for j in range(n_dims):
if i == j:
index, dim = plot_dims[i]
xi, yi = partial_dependence_1D(space, result.models[-1],
index,
samples=samples,
n_points=n_points)
if n_dims > 1:
ax_ = ax[i, i]
else:
ax_ = ax
ax_.plot(xi, yi)
ax_.axvline(minimum[index], linestyle="--", color="r", lw=1)
# lower triangle
elif i > j:
index1, dim1 = plot_dims[i]
index2, dim2 = plot_dims[j]
ax_ = ax[i, j]
xi, yi, zi = partial_dependence_2D(space, result.models[-1],
index1, index2,
samples, n_points)
ax_.contourf(xi, yi, zi, levels,
locator=locator, cmap=cmap)
if show_points:
ax_.scatter(x_samples[:, index2], x_samples[:, index1],
c='k', s=10, lw=0.)
ax_.scatter(minimum[index2], minimum[index1],
c=['r'], s=100, lw=0., marker='*')
ylabel = "Partial dependence"
# Make various adjustments to the plots.
return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
plot_dims=plot_dims,
dim_labels=dimensions)
[docs]def plot_evaluations(result, bins=20, dimensions=None,
plot_dims=None):
"""Visualize the order in which points were sampled during optimization.
This creates a 2-d matrix plot where the diagonal plots are histograms
that show the distribution of samples for each search-space dimension.
The plots below the diagonal are scatter-plots of the samples for
all combinations of search-space dimensions.
The order in which samples
were evaluated is encoded in each point's color.
A red star shows the best found parameters.
Parameters
----------
result : `OptimizeResult`
The optimization results from calling e.g. `gp_minimize()`.
bins : int, bins=20
Number of bins to use for histograms on the diagonal.
dimensions : list of str, default=None
Labels of the dimension
variables. `None` defaults to `space.dimensions[i].name`, or
if also `None` to `['X_0', 'X_1', ..]`.
plot_dims : list of str and int, default=None
List of dimension names or dimension indices from the
search-space dimensions to be included in the plot.
If `None` then use all dimensions except constant ones
from the search-space.
Returns
-------
ax : `Matplotlib.Axes`
A 2-d matrix of Axes-objects with the sub-plots.
"""
space = result.space
# Convert categoricals to integers, so we can ensure consistent ordering.
# Assign indices to categories in the order they appear in the Dimension.
# Matplotlib's categorical plotting functions are only present in v 2.1+,
# and may order categoricals differently in different plots anyway.
samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
order = range(samples.shape[0])
if plot_dims is None:
# Get all dimensions.
plot_dims = []
for row in range(space.n_dims):
if space.dimensions[row].is_constant:
continue
plot_dims.append((row, space.dimensions[row]))
else:
plot_dims = space[plot_dims]
# Number of search-space dimensions we are using.
n_dims = len(plot_dims)
if dimensions is not None:
assert len(dimensions) == n_dims
fig, ax = plt.subplots(n_dims, n_dims,
figsize=(2 * n_dims, 2 * n_dims))
fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
hspace=0.1, wspace=0.1)
for i in range(n_dims):
for j in range(n_dims):
if i == j:
index, dim = plot_dims[i]
if iscat[j]:
bins_ = len(dim.categories)
elif dim.prior == 'log-uniform':
low, high = space.bounds[index]
bins_ = np.logspace(np.log10(low), np.log10(high), bins)
else:
bins_ = bins
if n_dims == 1:
ax_ = ax
else:
ax_ = ax[i, i]
ax_.hist(samples[:, index], bins=bins_,
range=None if iscat[j] else dim.bounds)
# lower triangle
elif i > j:
index_i, dim_i = plot_dims[i]
index_j, dim_j = plot_dims[j]
ax_ = ax[i, j]
ax_.scatter(samples[:, index_j], samples[:, index_i],
c=order, s=40, lw=0., cmap='viridis')
ax_.scatter(minimum[index_j], minimum[index_i],
c=['r'], s=100, lw=0., marker='*')
# Make various adjustments to the plots.
return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
plot_dims=plot_dims,
dim_labels=dimensions)
def _get_ylim_diagonal(ax):
"""Get the min / max of the ylim for all diagonal plots.
This is used in _adjust_fig() so the ylim is the same
for all diagonal plots.
Parameters
----------
ax : `Matplotlib.Axes`
2-dimensional matrix with Matplotlib Axes objects.
Returns
-------
ylim_diagonal : tuple(int)
The common min and max ylim for the diagonal plots.
"""
# Number of search-space dimensions used in this plot.
if isinstance(ax, (list, np.ndarray)):
n_dims = len(ax)
# Get ylim for all diagonal plots.
ylim = [ax[row, row].get_ylim() for row in range(n_dims)]
else:
n_dim = 1
ylim = [ax.get_ylim()]
# Separate into two lists with low and high ylim.
ylim_lo, ylim_hi = zip(*ylim)
# Min and max ylim for all diagonal plots.
ylim_min = np.min(ylim_lo)
ylim_max = np.max(ylim_hi)
return ylim_min, ylim_max
[docs]def partial_dependence_1D(space, model, i, samples,
n_points=40):
"""
Calculate the partial dependence for a single dimension.
This uses the given model to calculate the average objective value
for all the samples, where the given dimension is fixed at
regular intervals between its bounds.
This shows how the given dimension affects the objective value
when the influence of all other dimensions are averaged out.
Parameters
----------
space : `Space`
The parameter space over which the minimization was performed.
model
Surrogate model for the objective function.
i : int
The dimension for which to calculate the partial dependence.
samples : np.array, shape=(n_points, n_dims)
Randomly sampled and transformed points to use when averaging
the model function at each of the `n_points` when using partial
dependence.
n_points : int, default=40
Number of points at which to evaluate the partial dependence
along each dimension `i`.
Returns
-------
xi : np.array
The points at which the partial dependence was evaluated.
yi : np.array
The average value of the modelled objective function at
each point `xi`.
"""
# The idea is to step through one dimension, evaluating the model with
# that dimension fixed and averaging either over random values or over
# the given ones in x_val in all other dimensions.
# (Or step through 2 dimensions when i and j are given.)
# Categorical dimensions make this interesting, because they are one-
# hot-encoded, so there is a one-to-many mapping of input dimensions
# to transformed (model) dimensions.
# dim_locs[i] is the (column index of the) start of dim i in
# sample_points.
# This is usefull when we are using one hot encoding, i.e using
# categorical values
dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
def _calc(x):
"""
Helper-function to calculate the average predicted
objective value for the given model, when setting
the index'th dimension of the search-space to the value x,
and then averaging over all samples.
"""
rvs_ = np.array(samples) # copy
# We replace the values in the dimension that we want to keep
# fixed
rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x
# In case of `x_eval=None` rvs conists of random samples.
# Calculating the mean of these samples is how partial dependence
# is implemented.
return np.mean(model.predict(rvs_))
xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points)
# Calculate the partial dependence for all the points.
yi = [_calc(x) for x in xi_transformed]
return xi, yi
[docs]def partial_dependence_2D(space, model, i, j, samples,
n_points=40):
"""
Calculate the partial dependence for two dimensions in the search-space.
This uses the given model to calculate the average objective value
for all the samples, where the given dimensions are fixed at
regular intervals between their bounds.
This shows how the given dimensions affect the objective value
when the influence of all other dimensions are averaged out.
Parameters
----------
space : `Space`
The parameter space over which the minimization was performed.
model
Surrogate model for the objective function.
i : int
The first dimension for which to calculate the partial dependence.
j : int
The second dimension for which to calculate the partial dependence.
samples : np.array, shape=(n_points, n_dims)
Randomly sampled and transformed points to use when averaging
the model function at each of the `n_points` when using partial
dependence.
n_points : int, default=40
Number of points at which to evaluate the partial dependence
along each dimension `i` and `j`.
Returns
-------
xi : np.array, shape=n_points
The points at which the partial dependence was evaluated.
yi : np.array, shape=n_points
The points at which the partial dependence was evaluated.
zi : np.array, shape=(n_points, n_points)
The average value of the objective function at each point `(xi, yi)`.
"""
# The idea is to step through one dimension, evaluating the model with
# that dimension fixed and averaging either over random values or over
# the given ones in x_val in all other dimensions.
# (Or step through 2 dimensions when i and j are given.)
# Categorical dimensions make this interesting, because they are one-
# hot-encoded, so there is a one-to-many mapping of input dimensions
# to transformed (model) dimensions.
# dim_locs[i] is the (column index of the) start of dim i in
# sample_points.
# This is usefull when we are using one hot encoding, i.e using
# categorical values
dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
def _calc(x, y):
"""
Helper-function to calculate the average predicted
objective value for the given model, when setting
the index1'th dimension of the search-space to the value x
and setting the index2'th dimension to the value y,
and then averaging over all samples.
"""
rvs_ = np.array(samples) # copy
rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x
rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y
return np.mean(model.predict(rvs_))
xi, xi_transformed = _evenly_sample(space.dimensions[j], n_points)
yi, yi_transformed = _evenly_sample(space.dimensions[i], n_points)
# Calculate the partial dependence for all combinations of these points.
zi = [[_calc(x, y) for x in xi_transformed] for y in yi_transformed]
# Convert list-of-list to a numpy array.
zi = np.array(zi)
return xi, yi, zi
[docs]def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
n_points=40, n_samples=250, levels=10, zscale='linear',
sample_source='random',
minimum='result', n_minimum_search=None, ax=None):
"""
Create and return a Matplotlib figure and axes with a landscape
contour-plot of the last fitted model of the search-space,
overlaid with all the samples from the optimization results,
for the two given dimensions of the search-space.
This is similar to `plot_objective()` but only for 2 dimensions
whose doc-string also has a more extensive explanation.
Parameters
----------
result : `OptimizeResult`
The optimization results e.g. from calling `gp_minimize()`.
dimension_identifier1 : str or int
Name or index of a dimension in the search-space.
dimension_identifier2 : str or int
Name or index of a dimension in the search-space.
n_samples : int, default=250
Number of random samples used for estimating the contour-plot
of the objective function.
n_points : int, default=40
Number of points along each dimension where the partial dependence
is evaluated when generating the contour-plots.
levels : int, default=10
Number of levels to draw on the contour plot.
zscale : str, default='linear'
Scale to use for the z axis of the contour plots.
Either 'log' or linear for all other choices.
ax : `Matplotlib.Axes`, default: None
When set, everything is plotted inside this axis.
Returns
-------
ax : `Matplotlib.Axes`
The Matplotlib Figure-object.
For example, you can save the plot by calling
`fig.savefig('file.png')`
"""
# Get the search-space instance from the optimization results.
space = result.space
x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
if sample_source == "random":
x_eval = None
samples = space.transform(space.rvs(n_samples=n_samples))
else:
x_eval = _evaluate_min_params(result, sample_source,
n_minimum_search)
samples = space.transform([x_eval])
x_samples, x_minimum, _ = _map_categories(space, result.x_iters, x_vals)
# Get the dimension-object, its index in the search-space, and its name.
index1, dimension1 = space[dimension_identifier1]
index2, dimension2 = space[dimension_identifier2]
# Get the samples from the optimization-log for the relevant dimensions.
# samples1 = get_samples_dimension(result=result, index=index1)
samples1 = x_samples[:, index1]
samples2 = x_samples[:, index2]
# samples2 = get_samples_dimension(result=result, index=index2)
# Get the best-found samples for the relevant dimensions.
best_sample1 = x_minimum[index1]
best_sample2 = x_minimum[index2]
# Get the last fitted model for the search-space.
last_model = result.models[-1]
# Estimate the objective function for these sampled points
# using the last fitted model for the search-space.
xi, yi, zi = partial_dependence_2D(space, last_model, index2, index1,
samples, n_points=n_points)
if ax is None:
ax = plt.gca()
# Scale for the z-axis of the contour-plot. Either Log or Linear (None).
locator = LogLocator() if zscale == 'log' else None
# Plot the contour-landscape for the objective function.
ax.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
# Plot all the parameters that were sampled during optimization.
# These are plotted as small black dots.
ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
# Plot the best parameters that were sampled during optimization.
# These are plotted as a big red star.
ax.scatter(best_sample1, best_sample2,
c='red', s=50, linewidths=1, marker='*')
# Use the dimension-names as the labels for the plot-axes.
ax.set_xlabel(dimension1.name)
ax.set_ylabel(dimension2.name)
ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
# Use log-scale on the x-axis?
if dimension1.prior == 'log-uniform':
ax.set_xscale('log')
# Use log-scale on the y-axis?
if dimension2.prior == 'log-uniform':
ax.set_yscale('log')
return ax
[docs]def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0,
ax=None):
"""
Create and return a Matplotlib figure with a histogram
of the samples from the optimization results,
for a given dimension of the search-space.
Parameters
----------
result : `OptimizeResult`
The optimization results e.g. from calling `gp_minimize()`.
dimension_identifier : str or int
Name or index of a dimension in the search-space.
bins : int, bins=20
Number of bins in the histogram.
rotate_labels : int, rotate_labels=0
Degree to rotate category-names on the x-axis.
Only used for Categorical dimensions.
Returns
-------
ax : `Matplotlib.Axes`
The Matplotlib Axes-object.
"""
# Get the search-space instance from the optimization results.
space = result.space
# Get the dimension-object.
index, dimension = space[dimension_identifier]
# Get the samples from the optimization-log for that particular dimension.
samples = [x[index] for x in result.x_iters]
if ax is None:
ax = plt.gca()
if isinstance(dimension, Categorical):
# When the search-space dimension is Categorical, it means
# that the possible values are strings. Matplotlib's histogram
# does not support this, so we have to make a bar-plot instead.
# NOTE: This only shows the categories that are in the samples.
# So if a category was not sampled, it will not be shown here.
# Count the number of occurrences of the string-categories.
counter = Counter(samples)
# The counter returns a dict where the keys are the category-names
# and the values are the number of occurrences for each category.
names = list(counter.keys())
counts = list(counter.values())
# Although Matplotlib's docs indicate that the bar() function
# can take a list of strings for the x-axis, it doesn't appear to work.
# So we hack it by creating a list of integers and setting the
# tick-labels with the category-names instead.
x = np.arange(len(counts))
# Plot using bars.
ax.bar(x, counts, tick_label=names)
# Adjust the rotation of the category-names on the x-axis.
ax.set_xticklabels(labels=names, rotation=rotate_labels)
else:
# Otherwise the search-space Dimension is either integer or float,
# in which case the histogram can be plotted more easily.
if dimension.prior == 'log-uniform':
# Map the number of bins to a log-space for the dimension bounds.
bins_mapped = np.logspace(*np.log10(dimension.bounds), bins)
else:
# Use the original number of bins.
bins_mapped = bins
# Plot the histogram.
ax.hist(samples, bins=bins_mapped, range=dimension.bounds)
# Use log-scale on the x-axis?
if dimension.prior == 'log-uniform':
ax.set_xscale('log')
# Set the labels.
ax.set_xlabel(dimension.name)
ax.set_ylabel('Sample Count')
return ax
def _map_categories(space, points, minimum):
"""
Map categorical values to integers in a set of points.
Returns
-------
mapped_points : np.array, shape=points.shape
A copy of `points` with categoricals replaced with their indices in
the corresponding `Dimension`.
mapped_minimum : np.array, shape (space.n_dims,)
A copy of `minimum` with categoricals replaced with their indices in
the corresponding `Dimension`.
iscat : np.array, shape (space.n_dims,)
Boolean array indicating whether dimension `i` in the `space` is
categorical.
"""
points = np.asarray(points, dtype=object) # Allow slicing, preserve cats
iscat = np.repeat(False, space.n_dims)
min_ = np.zeros(space.n_dims)
pts_ = np.zeros(points.shape)
for i, dim in enumerate(space.dimensions):
if isinstance(dim, Categorical):
iscat[i] = True
catmap = dict(zip(dim.categories, count()))
pts_[:, i] = [catmap[cat] for cat in points[:, i]]
min_[i] = catmap[minimum[i]]
else:
pts_[:, i] = points[:, i]
min_[i] = minimum[i]
return pts_, min_, iscat
def _evenly_sample(dim, n_points):
"""Return `n_points` evenly spaced points from a Dimension.
Parameters
----------
dim : `Dimension`
The Dimension to sample from. Can be categorical; evenly-spaced
category indices are chosen in order without replacement (result
may be smaller than `n_points`).
n_points : int
The number of points to sample from `dim`.
Returns
-------
xi : np.array
The sampled points in the Dimension. For Categorical
dimensions, returns the index of the value in
`dim.categories`.
xi_transformed : np.array
The transformed values of `xi`, for feeding to a model.
"""
cats = np.array(getattr(dim, 'categories', []), dtype=object)
if len(cats): # Sample categoricals while maintaining order
xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points),
dtype=int)
xi_transformed = dim.transform(cats[xi])
else:
bounds = dim.bounds
# XXX use linspace(*bounds, n_points) after python2 support ends
xi = np.linspace(bounds[0], bounds[1], n_points)
xi_transformed = dim.transform(xi)
return xi, xi_transformed
def _cat_format(dimension, x, _):
"""Categorical axis tick formatter function. Returns the name of category
`x` in `dimension`. Used with `matplotlib.ticker.FuncFormatter`."""
return str(dimension.categories[int(x)])
def _evaluate_min_params(result, params='result',
n_minimum_search=None,
random_state=None):
"""Returns the minimum based on `params`"""
x_vals = None
space = result.space
if isinstance(params, str):
if params == 'result':
# Using the best observed result
x_vals = result.x
elif params == 'expected_minimum':
if result.space.is_partly_categorical:
# space is also categorical
raise ValueError('expected_minimum does not support any'
'categorical values')
# Do a gradient based minimum search using scipys own minimizer
if n_minimum_search:
# If a value for
# expected_minimum_samples has been parsed
x_vals, _ = expected_minimum(
result,
n_random_starts=n_minimum_search,
random_state=random_state)
else: # Use standard of 20 random starting points
x_vals, _ = expected_minimum(result,
n_random_starts=20,
random_state=random_state)
elif params == 'expected_minimum_random':
# Do a minimum search by evaluating the function with
# n_samples sample values
if n_minimum_search is not None:
# If a value for
# n_minimum_samples has been parsed
x_vals, _ = expected_minimum_random_sampling(
result,
n_random_starts=n_minimum_search,
random_state=random_state)
else:
# Use standard of 10^n_parameters. Note this
# becomes very slow for many parameters
n_minimum_search = 10 ** len(result.x)
x_vals, _ = expected_minimum_random_sampling(
result,
n_random_starts=n_minimum_search,
random_state=random_state)
else:
raise ValueError('Argument ´eval_min_params´ must be a valid'
'string (´result´)')
elif isinstance(params, list):
assert len(params) == len(result.x), 'Argument' \
'´eval_min_params´ of type list must have same length as' \
'number of features'
# Using defined x_values
x_vals = params
else:
raise ValueError('Argument ´eval_min_params´ must'
'be a string or a list')
return x_vals