Source code for ngclearn.utils.viz.dim_reduce
import matplotlib
import matplotlib.pyplot as plt
default_cmap = plt.cm.jet
import numpy as np
from sklearn.decomposition import IncrementalPCA ## sci-kit learning dependency
from sklearn.manifold import TSNE ## sci-kit learning dependency
[docs]
def extract_pca_latents(vectors): ## PCA mapping routine
"""
Projects collection of K vectors (stored in a matrix) to a two-dimensional (2D)
visualization space via principal components analysis (PCA). Note that
if the input already has a 2D dimensionality, the original input is returned.
Args:
vectors: a matrix/codebook of (K x D) vectors to project
Returns:
a matrix (K x 2) of projected vectors (to 2D space)
"""
batch_size = 50
z_dim = vectors.shape[1]
if z_dim != 2:
ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
ipca.fit(vectors)
z_2D = ipca.transform(vectors)
else:
z_2D = vectors
return z_2D
[docs]
def extract_tsne_latents(vectors, perplexity=30, n_pca_comp=32, batch_size=500): ## tSNE mapping routine
"""
Projects collection of K vectors (stored in a matrix) to a two-dimensional (2D) visualization space via the
t-distributed stochastic neighbor embedding algorithm (t-SNE). This algorithm also uses PCA to produce an
intermediate project to speed up the t-SNE final mapping step. Note that if the input already has a 2D
dimensionality, the original input is returned.
Args:
vectors: a matrix/codebook of (K x D) vectors to project
perplexity: the perplexity control factor for t-SNE (Default: 30)
n_pca_comp: number of PCA top components (sorted by eigen-values) to retain/extract before continuing
with t-SNE dimensionality reduction
batch_size: number of sampled embedding vectors to use per iteration of online internal PCA
Returns:
a matrix (K x 2) of projected vectors (to 2D space)
"""
z_dim = vectors.shape[1]
if z_dim != 2:
print(" > Projecting latents via iPCA...")
n_comp = n_pca_comp #32 #10 #16 #50
if n_comp > batch_size:
batch_size = n_comp
if vectors.shape[1] < n_comp:
n_comp = vectors.shape[1] - 2
n_comp = max(2, n_comp)
ipca = IncrementalPCA(n_components=n_comp, batch_size=batch_size)
ipca.fit(vectors)
z_2D = ipca.transform(vectors)
print(" PCA.lat.shape = ",z_2D.shape)
print(" > Finishing projection via t-SNE...")
z_2D = TSNE(n_components=2,perplexity=perplexity, verbose=1).fit_transform(z_2D)
else:
z_2D = vectors
return z_2D
[docs]
def plot_latents(code_vectors, labels, plot_fname="2Dcode_plot.jpg", alpha=1., cmap=None):
"""
Produces a label-overlaid (label map to distinct colors) scatterplot for visualizing two-dimensional latent codes
(produced by either PCA or t-SNE).
Args:
code_vectors: a matrix of shape (K x 2) with vectors to plot/visualize
labels: label values, either of shape (K x 1) of integer values or of
shape (K x C) of binary one-hot encodings where C is the number of
classes.
plot_fname: /path/to/plot_fname.<suffix> for saving the plot to disk
alpha: alpha intensity level to present colors in scatterplot
cmap: custom color-map to provide
"""
curr_backend = plt.rcParams["backend"]
matplotlib.use('Agg') ## temporarily go in Agg plt backend for tsne plotting
print(" > Plotting 2D latent encodings...")
curr_backend = plt.rcParams["backend"]
matplotlib.use('Agg') ## temporarily go in Agg plt backend for tsne plotting
lab = labels
if lab.shape[1] > 1: ## extract integer class labels from a one-hot matrix
lab = np.argmax(lab, 1)
plt.figure(figsize=(8, 6))
_cmap = cmap
if _cmap is None:
_cmap = default_cmap
#print("> USING DEFAULT CMAP!")
plt.scatter(code_vectors[:, 0], code_vectors[:, 1], c=lab, cmap=_cmap, alpha=alpha)
colorbar = plt.colorbar()
#colorbar.set_alpha(1)
#plt.draw_all()
plt.grid()
plt.savefig("{0}".format(plot_fname), dpi=300)
plt.clf()
matplotlib.use(curr_backend) ## return back to auto-selected plt backend for system