Source

Load the dataset

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_lfw_people #lol
from sklearn.preprocessing import StandardScaler

dataset     = fetch_lfw_people(min_faces_per_person=100, resize=0.4)
A           = dataset['data']
labels      = dataset['target']
classes     = dataset['target_names']
label_names = np.array([classes[label] for label in labels])
print('🤖: Dataset contains {} points in {}-dimensional space'.format(*A.shape))

def plot_gallery(dataset, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    n_samples, h, w = dataset.images.shape
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    images, titles = dataset["images"], dataset["target"]
    titles = [dataset["target_names"][title] for title in titles]
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())

plot_gallery(dataset)
🤖: Dataset contains 1140 points in 1850-dimensional space

Task: normalize the data to have zero mean

### YOU CODE HERE
A_std = ...

Task: Calculate SVD of normalized matrix

A_{std} = U \Sigma V^\top

### YOU CODE HERE
u, sigmas, vt = ...

Task: plot eigenfaces

def plot_eigenfaces(dataset=dataset, u=u, sigmas=sigmas, vt=vt, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    n_samples, h, w = dataset.images.shape
    ### YOU CODE HERE
    projections = ...
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        
        plt.imshow(projections[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(f"Eigenface #{i+1}", size=12)
        plt.xticks(())
        plt.yticks(())
      
plot_eigenfaces()

Task: plot reconstructions

def plot_projections(rank = 20, dataset=dataset, u=u, sigmas=sigmas, vt=vt, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    n_samples, h, w = dataset.images.shape
    ### YOU CODE HERE
    projections = ...
    reconstructions = ...

    images, titles = dataset["images"], dataset["target"]
    titles = [dataset["target_names"][title] for title in titles]
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    print(f"Rank {rank} compression")
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        ### YOU CODE HERE
        plt.imshow(reconstructions[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(f"{titles[i]}", size=12)
        plt.xticks(())
        plt.yticks(())
      
plot_projections()
Rank 20 compression

Plot cumulative variance by each individual component graph

### YOUR CODE HERE
total_variance = ...
variance_explained = [(i / total_variance)*100 for i in sorted(sigmas, reverse=True)]
cumulative_variance_explained = np.cumsum(variance_explained)

### YOUR CODE HERE
n_sigmas = ...
xs = [0.5 + i for i in range(n_sigmas)]
plt.bar(xs, variance_explained, alpha=0.5, align='center',
        label='Individual explained variance')
plt.step(xs, cumulative_variance_explained, where='mid',
         label='Cumulative explained variance')
plt.ylabel('Explained variance')
plt.xlabel('Principal components')
plt.legend(loc='best')
# plt.xticks(np.arange(A_std.shape[1]+1))
plt.show()