Indeed there is no option to define the metric_params as in the other cases. For example other pairwise distance based classes provide a metric_params
parameter to pass additional params to the distance function. Like
have this:
metric_params : dict, optional (default = None)
Additional keyword arguments for the metric function.
This answer here shows how to use this param.
But TSNE has no way to send the additional parameters. So for now, you need to extend the class and over-ride the __init__()
to send the parameters and then _fit()
method to actually use them.
We can do this:
from time import time
import numpy as np
import scipy.sparse as sp
from sklearn.manifold import TSNE
from sklearn.externals.six import string_types
from sklearn.utils import check_array, check_random_state
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.manifold.t_sne import _joint_probabilities, _joint_probabilities_nn
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
class MyTSNE(TSNE):
def __init__(self, n_components=2, perplexity=30.0,
early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,
n_iter_without_progress=300, min_grad_norm=1e-7,
metric="euclidean", metric_params=None, #<=ADDED
init="random", verbose=0,
random_state=None, method='barnes_hut', angle=0.5):
self.n_components = n_components
self.perplexity = perplexity
self.early_exaggeration = early_exaggeration
self.learning_rate = learning_rate
self.n_iter = n_iter
self.n_iter_without_progress = n_iter_without_progress
self.min_grad_norm = min_grad_norm
self.metric = metric
self.metric_params = metric_params #<=ADDED
self.init = init
self.verbose = verbose
self.random_state = random_state
self.method = method
self.angle = angle
def _fit(self, X, skip_num_points=0):
if self.method not in ['barnes_hut', 'exact']:
raise ValueError("'method' must be 'barnes_hut' or 'exact'")
if self.angle < 0.0 or self.angle > 1.0:
raise ValueError("'angle' must be between 0.0 - 1.0")
if self.metric == "precomputed":
if isinstance(self.init, string_types) and self.init == 'pca':
raise ValueError("The parameter init=\"pca\" cannot be "
"used with metric=\"precomputed\".")
if X.shape[0] != X.shape[1]:
raise ValueError("X should be a square distance matrix")
if np.any(X < 0):
raise ValueError("All distances should be positive, the "
"precomputed distances given as X is not "
"correct")
if self.method == 'barnes_hut' and sp.issparse(X):
raise TypeError('A sparse matrix was passed, but dense '
'data is required for method="barnes_hut". Use '
'X.toarray() to convert to a dense numpy array if '
'the array is small enough for it to fit in '
'memory. Otherwise consider dimensionality '
'reduction techniques (e.g. TruncatedSVD)')
else:
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
dtype=[np.float32, np.float64])
if self.method == 'barnes_hut' and self.n_components > 3:
raise ValueError("'n_components' should be inferior to 4 for the "
"barnes_hut algorithm as it relies on "
"quad-tree or oct-tree.")
random_state = check_random_state(self.random_state)
if self.early_exaggeration < 1.0:
raise ValueError("early_exaggeration must be at least 1, but is {}"
.format(self.early_exaggeration))
if self.n_iter < 250:
raise ValueError("n_iter should be at least 250")
n_samples = X.shape[0]
neighbors_nn = None
if self.method == "exact":
if self.metric == "precomputed":
distances = X
else:
if self.verbose:
print("[t-SNE] Computing pairwise distances...")
if self.metric == "euclidean":
distances = pairwise_distances(X, metric=self.metric,
squared=True,
**self.metric_params) #<=ADDED
else:
distances = pairwise_distances(X, metric=self.metric,
**self.metric_params) #<=ADDED
if np.any(distances < 0):
raise ValueError("All distances should be positive, the "
"metric given is not correct")
P = _joint_probabilities(distances, self.perplexity, self.verbose)
assert np.all(np.isfinite(P)), "All probabilities should be finite"
assert np.all(P >= 0), "All probabilities should be non-negative"
assert np.all(P <= 1), ("All probabilities should be less "
"or then equal to one")
else:
k = min(n_samples - 1, int(3. * self.perplexity + 1))
if self.verbose:
print("[t-SNE] Computing {} nearest neighbors...".format(k))
knn = NearestNeighbors(algorithm='auto', n_neighbors=k,
metric=self.metric,
metric_params = self.metric_params) #<=ADDED
t0 = time()
knn.fit(X)
duration = time() - t0
if self.verbose:
print("[t-SNE] Indexed {} samples in {:.3f}s...".format(
n_samples, duration))
t0 = time()
distances_nn, neighbors_nn = knn.kneighbors(
None, n_neighbors=k)
duration = time() - t0
if self.verbose:
print("[t-SNE] Computed neighbors for {} samples in {:.3f}s..."
.format(n_samples, duration))
del knn
if self.metric == "euclidean":
distances_nn **= 2
P = _joint_probabilities_nn(distances_nn, neighbors_nn,
self.perplexity, self.verbose)
if isinstance(self.init, np.ndarray):
X_embedded = self.init
elif self.init == 'pca':
pca = PCA(n_components=self.n_components, svd_solver='randomized',
random_state=random_state)
X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)
elif self.init == 'random':
X_embedded = 1e-4 * random_state.randn(
n_samples, self.n_components).astype(np.float32)
else:
raise ValueError("'init' must be 'pca', 'random', or "
"a numpy array")
degrees_of_freedom = max(self.n_components - 1.0, 1)
return self._tsne(P, degrees_of_freedom, n_samples,
X_embedded=X_embedded,
neighbors=neighbors_nn,
skip_num_points=skip_num_points)
I have marked (#<=ADDED) on the changes. Now try using this class instead of TSNE like this:
tsne = MyTSNE(verbose=1,perplexity=40,n_iter=250,learning_rate=50, random_state=0,
metric='mahalanobis', metric_params={'V': np.cov(X)})
pt=data.sample(frac=0.1).values
tsne_results = tsne.fit_transform(pt)
Note:
Other classes that I mentioned on the top check the metric_params
for valid params, but I have not done that, so make sure you pass correct parameters in it, else it will give errors.
You should post the issue on scikit-learn issues page on github