Source code for skopt.learning.gbrt
import numpy as np
from sklearn.base import clone
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.utils import check_random_state
from joblib import Parallel, delayed
def _parallel_fit(regressor, X, y):
return regressor.fit(X, y)
[docs]class GradientBoostingQuantileRegressor(BaseEstimator, RegressorMixin):
"""Predict several quantiles with one estimator.
This is a wrapper around `GradientBoostingRegressor`'s quantile
regression that allows you to predict several `quantiles` in
one go.
Parameters
----------
quantiles : array-like
Quantiles to predict. By default the 16, 50 and 84%
quantiles are predicted.
base_estimator : GradientBoostingRegressor instance or None (default)
Quantile regressor used to make predictions. Only instances
of `GradientBoostingRegressor` are supported. Use this to change
the hyper-parameters of the estimator.
n_jobs : int, default=1
The number of jobs to run in parallel for `fit`.
If -1, then the number of jobs is set to the number of cores.
random_state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
"""
[docs] def __init__(self, quantiles=[0.16, 0.5, 0.84], base_estimator=None,
n_jobs=1, random_state=None):
self.quantiles = quantiles
self.random_state = random_state
self.base_estimator = base_estimator
self.n_jobs = n_jobs
[docs] def fit(self, X, y):
"""Fit one regressor for each quantile.
Parameters
----------
X : array-like, shape=(n_samples, n_features)
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
y : array-like, shape=(n_samples,)
Target values (real numbers in regression)
"""
rng = check_random_state(self.random_state)
if self.base_estimator is None:
base_estimator = GradientBoostingRegressor(loss='quantile')
else:
base_estimator = self.base_estimator
if not isinstance(base_estimator, GradientBoostingRegressor):
raise ValueError('base_estimator has to be of type'
' GradientBoostingRegressor.')
if not base_estimator.loss == 'quantile':
raise ValueError('base_estimator has to use quantile'
' loss not %s' % base_estimator.loss)
# The predictions for different quantiles should be sorted.
# Therefore each of the regressors need the same seed.
base_estimator.set_params(random_state=rng)
regressors = []
for q in self.quantiles:
regressor = clone(base_estimator)
regressor.set_params(alpha=q)
regressors.append(regressor)
self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(_parallel_fit)(regressor, X, y)
for regressor in regressors)
return self
[docs] def predict(self, X, return_std=False, return_quantiles=False):
"""Predict.
Predict `X` at every quantile if `return_std` is set to False.
If `return_std` is set to True, then return the mean
and the predicted standard deviation, which is approximated as
the (0.84th quantile - 0.16th quantile) divided by 2.0
Parameters
----------
X : array-like, shape=(n_samples, n_features)
where `n_samples` is the number of samples
and `n_features` is the number of features.
"""
predicted_quantiles = np.asarray(
[rgr.predict(X) for rgr in self.regressors_])
if return_quantiles:
return predicted_quantiles.T
elif return_std:
std_quantiles = [0.16, 0.5, 0.84]
is_present_mask = np.in1d(std_quantiles, self.quantiles)
if not np.all(is_present_mask):
raise ValueError(
"return_std works only if the quantiles during "
"instantiation include 0.16, 0.5 and 0.84")
low = self.regressors_[self.quantiles.index(0.16)].predict(X)
high = self.regressors_[self.quantiles.index(0.84)].predict(X)
mean = self.regressors_[self.quantiles.index(0.5)].predict(X)
return mean, ((high - low) / 2.0)
# return the mean
return self.regressors_[self.quantiles.index(0.5)].predict(X)