from __future__ import division
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import column_or_1d
[docs]class Identity(Transformer):
"""Identity transform.
"""
def transform(self, X):
return X
def inverse_transform(self, Xt):
return Xt
[docs]class StringEncoder(Transformer):
"""StringEncoder transform.
The transform will cast everything to a
string and the inverse transform will cast to the type defined in dtype.
"""
[docs] def __init__(self, dtype=str):
super(StringEncoder, self).__init__()
self.dtype = dtype
[docs] def fit(self, X):
"""Fit a list or array of categories. All elements must be from the
same type.
Parameters
----------
X : array-like, shape=(n_categories,)
List of categories.
"""
if len(X) > 0:
self.dtype = type(X[0])
[docs]class LogN(Transformer):
"""Base N logarithm transform."""
[docs] def __init__(self, base):
self._base = base
def transform(self, X):
return np.log10(np.asarray(X, dtype=float)) / np.log10(self._base)
def inverse_transform(self, Xt):
return self._base ** np.asarray(Xt, dtype=float)
[docs]class CategoricalEncoder(Transformer):
"""OneHotEncoder that can handle categorical variables."""
[docs] def __init__(self):
"""Convert labeled categories into one-hot encoded features."""
self._lb = LabelBinarizer()
[docs] def fit(self, X):
"""Fit a list or array of categories.
Parameters
----------
X : array-like, shape=(n_categories,)
List of categories.
"""
self.mapping_ = {v: i for i, v in enumerate(X)}
self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
self._lb.fit([self.mapping_[v] for v in X])
self.n_classes = len(self._lb.classes_)
return self
[docs]class LabelEncoder(Transformer):
"""LabelEncoder that can handle categorical variables."""
[docs] def __init__(self, X=None):
if X is not None:
self.fit(X)
[docs] def fit(self, X):
"""Fit a list or array of categories.
Parameters
----------
X : array-like, shape=(n_categories,)
List of categories.
"""
X = np.asarray(X)
if X.dtype == object:
self.mapping_ = {v: i for i, v in enumerate(X)}
else:
i = 0
self.mapping_ = {}
for v in np.unique(X):
self.mapping_[v] = i
i += 1
self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
return self
[docs]class Normalize(Transformer):
"""
Scales each dimension into the interval [0, 1].
Parameters
----------
low : float
Lower bound.
high : float
Higher bound.
is_int : bool, default=False
Round and cast the return value of `inverse_transform` to integer. Set
to `True` when applying this transform to integers.
"""
[docs] def __init__(self, low, high, is_int=False):
self.low = float(low)
self.high = float(high)
self.is_int = is_int
self._eps = 1e-8
def transform(self, X):
X = np.asarray(X)
if self.is_int:
if np.any(np.round(X) > self.high):
raise ValueError("All integer values should"
"be less than %f" % self.high)
if np.any(np.round(X) < self.low):
raise ValueError("All integer values should"
"be greater than %f" % self.low)
else:
if np.any(X > self.high + self._eps):
raise ValueError("All values should"
"be less than %f" % self.high)
if np.any(X < self.low - self._eps):
raise ValueError("All values should"
"be greater than %f" % self.low)
if (self.high - self.low) == 0.:
return X * 0.
if self.is_int:
return (np.round(X).astype(np.int) - self.low) /\
(self.high - self.low)
else:
return (X - self.low) / (self.high - self.low)
def inverse_transform(self, X):
X = np.asarray(X)
if np.any(X > 1.0 + self._eps):
raise ValueError("All values should be less than 1.0")
if np.any(X < 0.0 - self._eps):
raise ValueError("All values should be greater than 0.0")
X_orig = X * (self.high - self.low) + self.low
if self.is_int:
return np.round(X_orig).astype(np.int)
return X_orig
[docs]class Pipeline(Transformer):
"""
A lightweight pipeline to chain transformers.
Parameters
----------
transformers : list
A list of Transformer instances.
"""
[docs] def __init__(self, transformers):
self.transformers = list(transformers)
for transformer in self.transformers:
if not isinstance(transformer, Transformer):
raise ValueError(
"Provided transformers should be a Transformer "
"instance. Got %s" % transformer
)
def fit(self, X):
for transformer in self.transformers:
transformer.fit(X)
return self
def transform(self, X):
for transformer in self.transformers:
X = transformer.transform(X)
return X
def inverse_transform(self, X):
for transformer in self.transformers[::-1]:
X = transformer.inverse_transform(X)
return X