import numpy as np
import scipy.sparse
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from sklearn.utils.extmath import randomized_svd
The best low-rank approximation can be computed by SVD.
Theorem: Let r < \text{rank}(A), A_r = U_r \Sigma_r V_r^*. Then
\min_{\text{rank}(B)=r} \|A - B\|_2 = \|A - A_r\|_2 = \sigma_{r+1}.
The same holds for \|\cdot\|_F, but \|A - A_r\|_F = \sqrt{\sigma_{r+1}^2 + \dots + \sigma_{\min (n,m)}^2}.
Low-rank and sparse decomposition
A_r = U_r \Sigma_r V_r^* S = A - A_r Ax = A_r x + Sx = U_r \Sigma_r V_r^*x + Sx
For A: n \times n and rank truncation r<n:
Complexity of Ax: \mathcal{O}(n^2)
Complexity of A_rx: \mathcal{O}(nr)
Complexity of Sx: \mathcal{O}(nnz(S))
It becomes effective with:
r<<n
nnz(S)<<n^2
def decompose_matrix_with_sparse_correction_optimized(A, rank, threshold=1e-3):
= randomized_svd(A, n_components=rank, n_iter=5, random_state=None)
U, sigma, Vt = U[:, :rank]
U_r = sigma[:rank]
Sigma_r = Vt[:rank, :]
Vt_r = (U_r * Sigma_r) @ Vt_r
B = A - B
S_dense abs(S_dense) < threshold] = 0
S_dense[np.= scipy.sparse.csr_matrix(S_dense)
S_sparse return U_r, Sigma_r, Vt_r, S_sparse
def optimized_multiply(U, Sigma, Vt, S, x):
= Vt @ x
temp = Sigma * temp
temp = U @ temp
Bx = S @ x
Sx return Bx + Sx
# Parameters
= np.arange(200, 4000, 200)
sizes = 0.6
threshold = 0.05 max_rank
= []
mses = []
exact_times = []
approx_times = [] svd_times
for i, n in enumerate(sizes):
= np.random.rand(n, n)
A = np.random.rand(n)
x = int(n*max_rank)
rank = time.time()
start_time = decompose_matrix_with_sparse_correction_optimized(A, rank, threshold)
U_r, Sigma_r, Vt_r, S_sparse
- start_time)
svd_times.append(time.time()
# Measure time for exact multiplication
= time.time()
start_time = A @ x
exact_result - start_time)
exact_times.append(time.time()
# Measure time for decomposition-based multiplication
= time.time()
start_time = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_result - start_time)
approx_times.append(time.time()
= np.mean(((exact_result - approx_result)/exact_result) ** 2)
mse
mses.append(mse)
print("Step ", i, "of", len(sizes)," sparsity: ", S_sparse.nnz/n/n)
=(14, 6))
plt.figure(figsize
1, 2, 1)
plt.subplot(='o', label="MSE vs Rank")
plt.plot(sizes, mses, marker"Size")
plt.xlabel("Mean Squared Error (MSE)")
plt.ylabel("MSE Growth as Size Grows")
plt.title(True)
plt.grid(
plt.legend()
# Plot Time Comparison
1, 2, 2)
plt.subplot(="Exact Multiplication Time", marker='o')
plt.plot(sizes, exact_times, label="Approximate Multiplication Time", marker='x')
plt.plot(sizes, approx_times, label#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
"Size")
plt.xlabel("Time (seconds)")
plt.ylabel("Time Comparison: Exact vs Approximate Multiplication")
plt.title(True)
plt.grid(
plt.legend()
plt.tight_layout() plt.show()
= 3000
n = 0.5
threshold = np.arange(0.0, 1.0, 0.05) max_ranks
= []
mses = []
approx_times = [] svd_times
= np.random.rand(n, n)
A = np.random.rand(n)
x = time.time()
start_time = A @ x
exact_result = time.time() - start_time
exact_time
= decompose_matrix_with_sparse_correction_optimized(A, n, 0)
U, Sigma, Vt, S
for i, max_rank in enumerate(max_ranks):
= max(int(n*max_rank), 1)
rank = U[:, :rank]
U_r = Sigma[:rank]
Sigma_r = Vt[:rank, :]
Vt_r = (U_r * Sigma_r) @ Vt_r
B = A - B
S_dense abs(S_dense) < threshold] = 0
S_dense[np.= scipy.sparse.csr_matrix(S_dense)
S_sparse
# Measure time for decomposition-based multiplication
= time.time()
start_time = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_result - start_time)
approx_times.append(time.time()
= np.mean(((exact_result - approx_result)/exact_result) ** 2)
mse
mses.append(mse)
print("Step ", i, "of", len(max_ranks)," sparsity: ", S_sparse.nnz/n/n)
=(14, 6))
plt.figure(figsize
1, 2, 1)
plt.subplot(='o', label="MSE vs Rank")
plt.plot(max_ranks, mses, marker"Size")
plt.xlabel("Mean Squared Error (MSE)")
plt.ylabel("MSE Growth as Rank is Reduced")
plt.title(True)
plt.grid(
plt.legend()
# Plot Time Comparison
1, 2, 2)
plt.subplot(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(max_ranks, np.ones(="Approximate Multiplication Time", marker='x')
plt.plot(max_ranks, approx_times, label#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
"Size")
plt.xlabel("Time (seconds)")
plt.ylabel("Time Comparison: Exact vs Approximate Multiplication")
plt.title(True)
plt.grid(
plt.legend()
plt.tight_layout() plt.show()
= 3200
n = np.arange(0.0, 1.0, 0.05)
thresholds = 200 rank
= []
mses = []
approx_times = []
svd_times
= np.random.rand(n, n)
A = np.random.rand(n)
x = time.time()
start_time = A @ x
exact_result = time.time() - start_time
exact_time = decompose_matrix_with_sparse_correction_optimized(A, rank, 0)
U_r, Sigma_r, Vt_r, S
for i, threshold in enumerate(thresholds):
= S.toarray()
S_sparse abs(S_sparse) < threshold] = 0
S_sparse[np.= scipy.sparse.csr_matrix(S_sparse)
S_sparse
# Measure time for decomposition-based multiplication
= time.time()
start_time = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_result - start_time)
approx_times.append(time.time()
= np.mean(((exact_result - approx_result)/exact_result) ** 2)
mse
mses.append(mse)
print("Step ", i, "of", len(max_ranks)," sparsity: ", S_sparse.nnz/n/n)
=(14, 6))
plt.figure(figsize
1, 2, 1)
plt.subplot(='o', label="MSE vs Rank")
plt.plot(thresholds, mses, marker"Size")
plt.xlabel("Mean Squared Error (MSE)")
plt.ylabel("MSE Growth as Density is Reduced")
plt.title(True)
plt.grid(
plt.legend()
# Plot Time Comparison
1, 2, 2)
plt.subplot(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(thresholds, np.ones(="Approximate Multiplication Time", marker='x')
plt.plot(thresholds, approx_times, label#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
"Size")
plt.xlabel("Time (seconds)")
plt.ylabel("Time Comparison: Exact vs Approximate Multiplication")
plt.title(True)
plt.grid(
plt.legend()
plt.tight_layout() plt.show()
= []
mses = []
approx_times = []
svd_times
= np.random.rand(n, n)
A = np.random.rand(n)
x = time.time()
start_time = A @ x
exact_result = time.time() - start_time
exact_time = decompose_matrix_with_sparse_correction_optimized(A, rank, 0)
U_r, Sigma_r, Vt_r, S
for i, threshold in enumerate(thresholds):
= S.toarray()
S_sparse abs(S_sparse) < threshold] = 0
S_sparse[np.#S_sparse = scipy.sparse.csr_matrix(S_sparse)
# Measure time for decomposition-based multiplication
= time.time()
start_time = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_result - start_time)
approx_times.append(time.time()
= np.mean(((exact_result - approx_result)/exact_result) ** 2)
mse
mses.append(mse)
print("Step ", i, "of", len(max_ranks))
=(14, 6))
plt.figure(figsize
1, 2, 1)
plt.subplot(='o', label="MSE vs Rank")
plt.plot(thresholds, mses, marker"Size")
plt.xlabel("Mean Squared Error (MSE)")
plt.ylabel("MSE Growth as Density is Reduced")
plt.title(True)
plt.grid(
plt.legend()
# Plot Time Comparison
1, 2, 2)
plt.subplot(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(thresholds, np.ones(="Approximate Multiplication Time", marker='x')
plt.plot(thresholds, approx_times, label#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
"Size")
plt.xlabel("Time (seconds)")
plt.ylabel("Time Comparison: Exact vs Approximate Multiplication")
plt.title(True)
plt.grid(
plt.legend()
plt.tight_layout() plt.show()