from __future__ import division
import numpy as np
from sklearn.preprocessing import LabelBinarizer
[docs]class Identity(Transformer):
"""Identity transform.
"""
def transform(self, X):
return X
def inverse_transform(self, Xt):
return Xt
class StringEncoder(Transformer):
"""StringEncoder transform.
The transform will cast everything to a
string and the inverse transform will cast to the type defined in dtype.
"""
def __init__(self, dtype=str):
super(StringEncoder, self).__init__()
self.dtype = dtype
def fit(self, X):
"""Fit a list or array of categories. All elements must be from the
same type.
Parameters
----------
X : array-like, shape=(n_categories,)
List of categories.
"""
if len(X) > 0:
self.dtype = type(X[0])
def transform(self, X):
"""Transform an array of categories to a string encoded representation.
Parameters
----------
X : array-like, shape=(n_samples,)
List of categories.
Returns
-------
Xt : array-like, shape=(n_samples,)
The string encoded categories.
"""
return [str(x) for x in X]
def inverse_transform(self, Xt):
"""Inverse transform string encoded categories back to their original
representation.
Parameters
----------
Xt : array-like, shape=(n_samples,)
String encoded categories.
Returns
-------
X : array-like, shape=(n_samples,)
The original categories.
"""
return [self.dtype(x) for x in Xt]
[docs]class LogN(Transformer):
"""Base N logarithm transform."""
[docs] def __init__(self, base):
self._base = base
def transform(self, X):
return np.log10(np.asarray(X, dtype=np.float)) / np.log10(self._base)
def inverse_transform(self, Xt):
return self._base ** np.asarray(Xt, dtype=np.float)
[docs]class CategoricalEncoder(Transformer):
"""OneHotEncoder that can handle categorical variables."""
[docs] def __init__(self):
"""Convert labeled categories into one-hot encoded features."""
self._lb = LabelBinarizer()
[docs] def fit(self, X):
"""Fit a list or array of categories.
Parameters
----------
X : array-like, shape=(n_categories,)
List of categories.
"""
self.mapping_ = {v: i for i, v in enumerate(X)}
self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
self._lb.fit([self.mapping_[v] for v in X])
self.n_classes = len(self._lb.classes_)
return self
[docs]class Normalize(Transformer):
"""
Scales each dimension into the interval [0, 1].
Parameters
----------
low : float
Lower bound.
high : float
Higher bound.
is_int : bool, default=True
Round and cast the return value of `inverse_transform` to integer. Set
to `True` when applying this transform to integers.
"""
[docs] def __init__(self, low, high, is_int=False):
self.low = float(low)
self.high = float(high)
self.is_int = is_int
def transform(self, X):
X = np.asarray(X)
if self.is_int:
if np.any(np.round(X) > self.high):
raise ValueError("All integer values should"
"be less than %f" % self.high)
if np.any(np.round(X) < self.low):
raise ValueError("All integer values should"
"be greater than %f" % self.low)
else:
if np.any(X > self.high + 1e-8):
raise ValueError("All values should"
"be less than %f" % self.high)
if np.any(X < self.low - 1e-8):
raise ValueError("All values should"
"be greater than %f" % self.low)
if self.is_int:
return (np.round(X).astype(np.int) - self.low) /\
(self.high - self.low)
else:
return (X - self.low) / (self.high - self.low)
def inverse_transform(self, X):
X = np.asarray(X)
if np.any(X > 1.0):
raise ValueError("All values should be less than 1.0")
if np.any(X < 0.0):
raise ValueError("All values should be greater than 0.0")
X_orig = X * (self.high - self.low) + self.low
if self.is_int:
return np.round(X_orig).astype(np.int)
return X_orig
[docs]class Pipeline(Transformer):
"""
A lightweight pipeline to chain transformers.
Parameters
----------
transformers : list
A list of Transformer instances.
"""
[docs] def __init__(self, transformers):
self.transformers = list(transformers)
for transformer in self.transformers:
if not isinstance(transformer, Transformer):
raise ValueError(
"Provided transformers should be a Transformer "
"instance. Got %s" % transformer
)
def fit(self, X):
for transformer in self.transformers:
transformer.fit(X)
return self
def transform(self, X):
for transformer in self.transformers:
X = transformer.transform(X)
return X
def inverse_transform(self, X):
for transformer in self.transformers[::-1]:
X = transformer.inverse_transform(X)
return X