Source code for ngclearn.utils.viz.dim_reduce

import matplotlib
import matplotlib.pyplot as plt
default_cmap = plt.cm.jet

import numpy as np
from sklearn.decomposition import IncrementalPCA ## sci-kit learning dependency
from sklearn.manifold import TSNE ## sci-kit learning dependency

[docs] def extract_pca_latents(vectors): ## PCA mapping routine """ Projects collection of K vectors (stored in a matrix) to a two-dimensional (2D) visualization space via principal components analysis (PCA). Note that if the input already has a 2D dimensionality, the original input is returned. Args: vectors: a matrix/codebook of (K x D) vectors to project Returns: a matrix (K x 2) of projected vectors (to 2D space) """ batch_size = 50 z_dim = vectors.shape[1] if z_dim != 2: ipca = IncrementalPCA(n_components=2, batch_size=batch_size) ipca.fit(vectors) z_2D = ipca.transform(vectors) else: z_2D = vectors return z_2D
[docs] def extract_tsne_latents(vectors, perplexity=30, n_pca_comp=32, batch_size=500): ## tSNE mapping routine """ Projects collection of K vectors (stored in a matrix) to a two-dimensional (2D) visualization space via the t-distributed stochastic neighbor embedding algorithm (t-SNE). This algorithm also uses PCA to produce an intermediate project to speed up the t-SNE final mapping step. Note that if the input already has a 2D dimensionality, the original input is returned. Args: vectors: a matrix/codebook of (K x D) vectors to project perplexity: the perplexity control factor for t-SNE (Default: 30) n_pca_comp: number of PCA top components (sorted by eigen-values) to retain/extract before continuing with t-SNE dimensionality reduction batch_size: number of sampled embedding vectors to use per iteration of online internal PCA Returns: a matrix (K x 2) of projected vectors (to 2D space) """ z_dim = vectors.shape[1] if z_dim != 2: print(" > Projecting latents via iPCA...") n_comp = n_pca_comp #32 #10 #16 #50 if n_comp > batch_size: batch_size = n_comp if vectors.shape[1] < n_comp: n_comp = vectors.shape[1] - 2 n_comp = max(2, n_comp) ipca = IncrementalPCA(n_components=n_comp, batch_size=batch_size) ipca.fit(vectors) z_2D = ipca.transform(vectors) print(" PCA.lat.shape = ",z_2D.shape) print(" > Finishing projection via t-SNE...") z_2D = TSNE(n_components=2,perplexity=perplexity, verbose=1).fit_transform(z_2D) else: z_2D = vectors return z_2D
[docs] def plot_latents(code_vectors, labels, plot_fname="2Dcode_plot.jpg", alpha=1., cmap=None): """ Produces a label-overlaid (label map to distinct colors) scatterplot for visualizing two-dimensional latent codes (produced by either PCA or t-SNE). Args: code_vectors: a matrix of shape (K x 2) with vectors to plot/visualize labels: label values, either of shape (K x 1) of integer values or of shape (K x C) of binary one-hot encodings where C is the number of classes. plot_fname: /path/to/plot_fname.<suffix> for saving the plot to disk alpha: alpha intensity level to present colors in scatterplot cmap: custom color-map to provide """ curr_backend = plt.rcParams["backend"] matplotlib.use('Agg') ## temporarily go in Agg plt backend for tsne plotting print(" > Plotting 2D latent encodings...") curr_backend = plt.rcParams["backend"] matplotlib.use('Agg') ## temporarily go in Agg plt backend for tsne plotting lab = labels if lab.shape[1] > 1: ## extract integer class labels from a one-hot matrix lab = np.argmax(lab, 1) plt.figure(figsize=(8, 6)) _cmap = cmap if _cmap is None: _cmap = default_cmap #print("> USING DEFAULT CMAP!") plt.scatter(code_vectors[:, 0], code_vectors[:, 1], c=lab, cmap=_cmap, alpha=alpha) colorbar = plt.colorbar() #colorbar.set_alpha(1) #plt.draw_all() plt.grid() plt.savefig("{0}".format(plot_fname), dpi=300) plt.clf() matplotlib.use(curr_backend) ## return back to auto-selected plt backend for system