Source code for skopt.space.space

import numbers
import numpy as np
import yaml

from scipy.stats.distributions import randint
from scipy.stats.distributions import rv_discrete
from scipy.stats.distributions import uniform

from sklearn.utils import check_random_state
from sklearn.utils.fixes import sp_version

from .transformers import CategoricalEncoder
from .transformers import StringEncoder
from .transformers import Normalize
from .transformers import Identity
from .transformers import LogN
from .transformers import Pipeline


# helper class to be able to print [1, ..., 4] instead of [1, '...', 4]
class _Ellipsis:
    def __repr__(self):
        return '...'


[docs]def check_dimension(dimension, transform=None): """Turn a provided dimension description into a dimension object. Checks that the provided dimension falls into one of the supported types. For a list of supported types, look at the documentation of ``dimension`` below. If ``dimension`` is already a ``Dimension`` instance, return it. Parameters ---------- dimension : Dimension Search space Dimension. Each search dimension can be defined either as - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` dimensions), - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). transform : "identity", "normalize", "string", "onehot" optional - For `Categorical` dimensions, the following transformations are supported. - "onehot" (default) one-hot transformation of the original space. - "string" string transformation of the original space. - "identity" same as the original space. - For `Real` and `Integer` dimensions, the following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. Returns ------- dimension : Dimension Dimension instance. """ if isinstance(dimension, Dimension): return dimension if not isinstance(dimension, (list, tuple, np.ndarray)): raise ValueError("Dimension has to be a list or tuple.") # A `Dimension` described by a single value is assumed to be # a `Categorical` dimension. This can be used in `BayesSearchCV` # to define subspaces that fix one value, e.g. to choose the # model type, see "sklearn-gridsearchcv-replacement.py" # for examples. if len(dimension) == 1: return Categorical(dimension, transform=transform) if len(dimension) == 2: if any([isinstance(d, (str, bool)) or isinstance(d, np.bool_) for d in dimension]): return Categorical(dimension, transform=transform) elif all([isinstance(dim, numbers.Integral) for dim in dimension]): return Integer(*dimension, transform=transform) elif any([isinstance(dim, numbers.Real) for dim in dimension]): return Real(*dimension, transform=transform) else: raise ValueError("Invalid dimension {}. Read the documentation for" " supported types.".format(dimension)) if len(dimension) == 3: if (any([isinstance(dim, int) for dim in dimension[:2]]) and dimension[2] in ["uniform", "log-uniform"]): return Integer(*dimension, transform=transform) elif (any([isinstance(dim, (float, int)) for dim in dimension[:2]]) and dimension[2] in ["uniform", "log-uniform"]): return Real(*dimension, transform=transform) else: return Categorical(dimension, transform=transform) if len(dimension) == 4: if (any([isinstance(dim, int) for dim in dimension[:2]]) and dimension[2] == "log-uniform" and isinstance(dimension[3], int)): return Integer(*dimension, transform=transform) elif (any([isinstance(dim, (float, int)) for dim in dimension[:2]]) and dimension[2] == "log-uniform" and isinstance(dimension[3], int)): return Real(*dimension, transform=transform) if len(dimension) > 3: return Categorical(dimension, transform=transform) raise ValueError("Invalid dimension {}. Read the documentation for " "supported types.".format(dimension))
[docs]class Dimension(object): """Base class for search space dimensions.""" prior = None
[docs] def rvs(self, n_samples=1, random_state=None): """Draw random samples. Parameters ---------- n_samples : int or None The number of samples to be drawn. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. """ rng = check_random_state(random_state) samples = self._rvs.rvs(size=n_samples, random_state=rng) return self.inverse_transform(samples)
[docs] def transform(self, X): """Transform samples form the original space to a warped space.""" return self.transformer.transform(X)
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ return self.transformer.inverse_transform(Xt)
@property def size(self): return 1 @property def transformed_size(self): return 1 @property def bounds(self): raise NotImplementedError @property def transformed_bounds(self): raise NotImplementedError @property def name(self): return self._name @name.setter def name(self, value): if isinstance(value, str) or value is None: self._name = value else: raise ValueError("Dimension's name must be either string or None.")
def _uniform_inclusive(loc=0.0, scale=1.0): # like scipy.stats.distributions but inclusive of `high` # XXX scale + 1. might not actually be a float after scale if # XXX scale is very large. return uniform(loc=loc, scale=np.nextafter(scale, scale + 1.))
[docs]class Real(Dimension): """Search space dimension that can take on any real value. Parameters ---------- low : float Lower bound (inclusive). high : float Upper bound (inclusive). prior : "uniform" or "log-uniform", default="uniform" Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log(lower, base)` and `log(upper, base)` where log has base `base`. base : int The logarithmic base to use for a log-uniform prior. - Default 10, otherwise commonly 2. transform : "identity", "normalize", optional The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. name : str or None Name associated with the dimension, e.g., "learning rate". dtype : str or dtype, default=np.float float type which will be used in inverse_transform, can be float. """
[docs] def __init__(self, low, high, prior="uniform", base=10, transform=None, name=None, dtype=np.float): if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name self.dtype = dtype if isinstance(self.dtype, str) and self.dtype\ not in ['float', 'float16', 'float32', 'float64']: raise ValueError("dtype must be 'float', 'float16', 'float32'" "or 'float64'" " got {}".format(self.dtype)) elif isinstance(self.dtype, type) and self.dtype\ not in [float, np.float, np.float16, np.float32, np.float64]: raise ValueError("dtype must be float, np.float" " got {}".format(self.dtype)) if transform is None: transform = "identity" self.transform_ = transform if self.transform_ not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) # Define _rvs and transformer spaces. # XXX: The _rvs is for sampling in the transformed space. # The rvs on Dimension calls inverse_transform on the points sampled # using _rvs if self.transform_ == "normalize": # set upper bound to next float after 1. to make the numbers # inclusive of upper edge self._rvs = _uniform_inclusive(0., 1.) if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(low, high)]) else: self.transformer = Pipeline( [LogN(self.base), Normalize(np.log10(low) / self.log_base, np.log10(high) / self.log_base)] ) else: if self.prior == "uniform": self._rvs = _uniform_inclusive(self.low, self.high - self.low) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base) self.transformer = LogN(self.base)
def __eq__(self, other): return (type(self) is type(other) and np.allclose([self.low], [other.low]) and np.allclose([self.high], [other.high]) and self.prior == other.prior and self.transform_ == other.transform_) def __repr__(self): return "Real(low={}, high={}, prior='{}', transform='{}')".format( self.low, self.high, self.prior, self.transform_)
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ inv_transform = super(Real, self).inverse_transform(Xt) if isinstance(inv_transform, list): inv_transform = np.array(inv_transform) inv_transform = np.clip(inv_transform, self.low, self.high).astype(self.dtype) if self.dtype == float or self.dtype == 'float': # necessary, otherwise the type is converted to a numpy type return getattr(inv_transform, "tolist", lambda: value)() else: return inv_transform
@property def bounds(self): return (self.low, self.high) def __contains__(self, point): if isinstance(point, list): point = np.array(point) return self.low <= point <= self.high @property def transformed_bounds(self): if self.transform_ == "normalize": return 0.0, 1.0 else: if self.prior == "uniform": return self.low, self.high else: return np.log10(self.low), np.log10(self.high)
[docs] def distance(self, a, b): """Compute distance between point `a` and `b`. Parameters ---------- a : float First point. b : float Second point. """ if not (a in self and b in self): raise RuntimeError("Can only compute distance for values within " "the space, not %s and %s." % (a, b)) return abs(a - b)
[docs]class Integer(Dimension): """Search space dimension that can take on integer values. Parameters ---------- low : int Lower bound (inclusive). high : int Upper bound (inclusive). prior : "uniform" or "log-uniform", default="uniform" Distribution to use when sampling random integers for this dimension. - If `"uniform"`, intgers are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, intgers are sampled uniformly between `log(lower, base)` and `log(upper, base)` where log has base `base`. base : int The logarithmic base to use for a log-uniform prior. - Default 10, otherwise commonly 2. transform : "identity", "normalize", optional The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. name : str or None Name associated with dimension, e.g., "number of trees". dtype : str or dtype, default=np.int64 integer type which will be used in inverse_transform, can be int, np.int16, np.uint32, np.int32, np.int64 (default). When set to int, `inverse_transform` returns a list instead of a numpy array """
[docs] def __init__(self, low, high, prior="uniform", base=10, transform=None, name=None, dtype=np.int64): if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name self.dtype = dtype if isinstance(self.dtype, str) and self.dtype\ not in ['int', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']: raise ValueError("dtype must be 'int', 'int8', 'int16'," "'int32', 'int64', 'uint8'," "'uint16', 'uint32', or" "'uint64', but got {}".format(self.dtype)) elif isinstance(self.dtype, type) and self.dtype\ not in [int, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64]: raise ValueError("dtype must be 'int', 'np.int8', 'np.int16'," "'np.int32', 'np.int64', 'np.uint8'," "'np.uint16', 'np.uint32', or" "'np.uint64', but got {}".format(self.dtype)) if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if self.transform_ == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(low, high, is_int=True)]) else: self.transformer = Pipeline( [LogN(self.base), Normalize(np.log10(low) / self.log_base, np.log10(high) / self.log_base)] ) else: if self.prior == "uniform": self._rvs = randint(self.low, self.high + 1) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base) self.transformer = LogN(self.base)
def __eq__(self, other): return (type(self) is type(other) and np.allclose([self.low], [other.low]) and np.allclose([self.high], [other.high])) def __repr__(self): return "Integer(low={}, high={}, prior='{}', transform='{}')".format( self.low, self.high, self.prior, self.transform_)
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ # The concatenation of all transformed dimensions makes Xt to be # of type float, hence the required cast back to int. inv_transform = super(Integer, self).inverse_transform(Xt) if isinstance(inv_transform, list): inv_transform = np.array(inv_transform) if self.dtype == int or self.dtype == 'int': # necessary, otherwise the type is converted to a numpy type return getattr(np.round(inv_transform).astype(self.dtype), "tolist", lambda: value)() else: return np.round(inv_transform).astype(self.dtype)
@property def bounds(self): return (self.low, self.high) def __contains__(self, point): if isinstance(point, list): point = np.array(point) return self.low <= point <= self.high @property def transformed_bounds(self): if self.transform_ == "normalize": return 0, 1 else: return (self.low, self.high)
[docs] def distance(self, a, b): """Compute distance between point `a` and `b`. Parameters ---------- a : int First point. b : int Second point. """ if not (a in self and b in self): raise RuntimeError("Can only compute distance for values within " "the space, not %s and %s." % (a, b)) return abs(a - b)
[docs]class Categorical(Dimension): """Search space dimension that can take on categorical values. Parameters ---------- categories : list, shape=(n_categories,) Sequence of possible categories. prior : list, shape=(categories,), default=None Prior probabilities for each category. By default all categories are equally likely. transform : "onehot", "string", "identity", default="onehot" - "identity", the transformed space is the same as the original space. - "string", the transformed space is a string encoded representation of the original space. - "onehot", the transformed space is a one-hot encoded representation of the original space. name : str or None Name associated with dimension, e.g., "colors". """
[docs] def __init__(self, categories, prior=None, transform=None, name=None): self.categories = tuple(categories) self.name = name if transform is None: transform = "onehot" self.transform_ = transform if transform not in ["identity", "onehot", "string"]: raise ValueError("Expected transform to be 'identity', 'string' or" "'onehot' got {}".format(transform)) if transform == "onehot": self.transformer = CategoricalEncoder() self.transformer.fit(self.categories) elif transform == "string": self.transformer = StringEncoder() self.transformer.fit(self.categories) else: self.transformer = Identity() self.transformer.fit(self.categories) self.prior = prior if prior is None: self.prior_ = np.tile(1. / len(self.categories), len(self.categories)) else: self.prior_ = prior # XXX check that sum(prior) == 1 self._rvs = rv_discrete( values=(range(len(self.categories)), self.prior_) )
def __eq__(self, other): return (type(self) is type(other) and self.categories == other.categories and np.allclose(self.prior_, other.prior_)) def __repr__(self): if len(self.categories) > 7: cats = self.categories[:3] + (_Ellipsis(),) + self.categories[-3:] else: cats = self.categories if self.prior is not None and len(self.prior) > 7: prior = self.prior[:3] + [_Ellipsis()] + self.prior[-3:] else: prior = self.prior return "Categorical(categories={}, prior={})".format(cats, prior)
[docs] def rvs(self, n_samples=None, random_state=None): choices = self._rvs.rvs(size=n_samples, random_state=random_state) if isinstance(choices, numbers.Integral): return self.categories[choices] else: return [self.categories[c] for c in choices]
@property def transformed_size(self): if self.transform_ == "onehot": size = len(self.categories) # when len(categories) == 2, CategoricalEncoder outputs a # single value return size if size != 2 else 1 return 1 @property def bounds(self): return self.categories def __contains__(self, point): return point in self.categories @property def transformed_bounds(self): if self.transformed_size == 1: return (0.0, 1.0) else: return [(0.0, 1.0) for i in range(self.transformed_size)]
[docs] def distance(self, a, b): """Compute distance between category `a` and `b`. As categories have no order the distance between two points is one if a != b and zero otherwise. Parameters ---------- a : category First category. b : category Second category. """ if not (a in self and b in self): raise RuntimeError("Can only compute distance for values within" " the space, not {} and {}.".format(a, b)) return 1 if a != b else 0
[docs]class Space(object): """Initialize a search space from given specifications. Parameters ---------- dimensions : list, shape=(n_dims,) List of search space dimensions. Each search dimension can be defined either as - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` dimensions), - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). .. note:: The upper and lower bounds are inclusive for `Integer` dimensions. """
[docs] def __init__(self, dimensions): self.dimensions = [check_dimension(dim) for dim in dimensions]
def __eq__(self, other): return all([a == b for a, b in zip(self.dimensions, other.dimensions)]) def __repr__(self): if len(self.dimensions) > 31: dims = self.dimensions[:15] + [_Ellipsis()] + self.dimensions[-15:] else: dims = self.dimensions return "Space([{}])".format(',\n '.join(map(str, dims))) def __iter__(self): return iter(self.dimensions) @property def is_real(self): """ Returns true if all dimensions are Real """ return all([isinstance(dim, Real) for dim in self.dimensions])
[docs] @classmethod def from_yaml(cls, yml_path, namespace=None): """Create Space from yaml configuration file Parameters ---------- yml_path : str Full path to yaml configuration file, example YaML below: Space: - Integer: low: -5 high: 5 - Categorical: categories: - a - b - Real: low: 1.0 high: 5.0 prior: log-uniform namespace : str, default=None Namespace within configuration file to use, will use first namespace if not provided Returns ------- space : Space Instantiated Space object """ with open(yml_path, 'rb') as f: config = yaml.safe_load(f) dimension_classes = {'real': Real, 'integer': Integer, 'categorical': Categorical} # Extract space options for configuration file if isinstance(config, dict): if namespace is None: options = next(iter(config.values())) else: options = config[namespace] elif isinstance(config, list): options = config else: raise TypeError('YaML does not specify a list or dictionary') # Populate list with Dimension objects dimensions = [] for option in options: key = next(iter(option.keys())) # Make configuration case insensitive dimension_class = key.lower() values = {k.lower(): v for k, v in option[key].items()} if dimension_class in dimension_classes: # Instantiate Dimension subclass and add it to the list dimension = dimension_classes[dimension_class](**values) dimensions.append(dimension) space = cls(dimensions=dimensions) return space
[docs] def rvs(self, n_samples=1, random_state=None): """Draw random samples. The samples are in the original space. They need to be transformed before being passed to a model or minimizer by `space.transform()`. Parameters ---------- n_samples : int, default=1 Number of samples to be drawn from the space. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. Returns ------- points : list of lists, shape=(n_points, n_dims) Points sampled from the space. """ rng = check_random_state(random_state) # Draw columns = [] for dim in self.dimensions: if sp_version < (0, 16): columns.append(dim.rvs(n_samples=n_samples)) else: columns.append(dim.rvs(n_samples=n_samples, random_state=rng)) # Transpose rows = [] for i in range(n_samples): r = [] for j in range(self.n_dims): r.append(columns[j][i]) rows.append(r) return rows
[docs] def transform(self, X): """Transform samples from the original space into a warped space. Note: this transformation is expected to be used to project samples into a suitable space for numerical optimization. Parameters ---------- X : list of lists, shape=(n_samples, n_dims) The samples to transform. Returns ------- Xt : array of floats, shape=(n_samples, transformed_n_dims) The transformed samples. """ # Pack by dimension columns = [] for dim in self.dimensions: columns.append([]) for i in range(len(X)): for j in range(self.n_dims): columns[j].append(X[i][j]) # Transform for j in range(self.n_dims): columns[j] = self.dimensions[j].transform(columns[j]) # Repack as an array Xt = np.hstack([np.asarray(c).reshape((len(X), -1)) for c in columns]) return Xt
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back to the original space. Parameters ---------- Xt : array of floats, shape=(n_samples, transformed_n_dims) The samples to inverse transform. Returns ------- X : list of lists, shape=(n_samples, n_dims) The original samples. """ # Inverse transform columns = [] start = 0 for j in range(self.n_dims): dim = self.dimensions[j] offset = dim.transformed_size if offset == 1: columns.append(dim.inverse_transform(Xt[:, start])) else: columns.append( dim.inverse_transform(Xt[:, start:start + offset])) start += offset # Transpose rows = [] for i in range(len(Xt)): r = [] for j in range(self.n_dims): r.append(columns[j][i]) rows.append(r) return rows
@property def n_dims(self): """The dimensionality of the original space.""" return len(self.dimensions) @property def transformed_n_dims(self): """The dimensionality of the warped space.""" return sum([dim.transformed_size for dim in self.dimensions]) @property def bounds(self): """The dimension bounds, in the original space.""" b = [] for dim in self.dimensions: if dim.size == 1: b.append(dim.bounds) else: b.extend(dim.bounds) return b def __contains__(self, point): """Check that `point` is within the bounds of the space.""" for component, dim in zip(point, self.dimensions): if component not in dim: return False return True @property def transformed_bounds(self): """The dimension bounds, in the warped space.""" b = [] for dim in self.dimensions: if dim.transformed_size == 1: b.append(dim.transformed_bounds) else: b.extend(dim.transformed_bounds) return b @property def is_categorical(self): """Space contains exclusively categorical dimensions""" return all([isinstance(dim, Categorical) for dim in self.dimensions]) @property def is_partly_categorical(self): """Space contains any categorical dimensions""" return any([isinstance(dim, Categorical) for dim in self.dimensions])
[docs] def distance(self, point_a, point_b): """Compute distance between two points in this space. Parameters ---------- point_a : array First point. point_b : array Second point. """ distance = 0. for a, b, dim in zip(point_a, point_b, self.dimensions): distance += dim.distance(a, b) return distance