import numpy as np
import scipy.sparse
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from sklearn.utils.extmath import randomized_svdThe best low-rank approximation can be computed by SVD.
Theorem: Let r < \text{rank}(A), A_r = U_r \Sigma_r V_r^*. Then
\min_{\text{rank}(B)=r} \|A - B\|_2 = \|A - A_r\|_2 = \sigma_{r+1}.
The same holds for \|\cdot\|_F, but \|A - A_r\|_F = \sqrt{\sigma_{r+1}^2 + \dots + \sigma_{\min (n,m)}^2}.
Low-rank and sparse decomposition
A_r = U_r \Sigma_r V_r^* S = A - A_r Ax = A_r x + Sx = U_r \Sigma_r V_r^*x + Sx
For A: n \times n and rank truncation r<n:
Complexity of Ax: \mathcal{O}(n^2)
Complexity of A_rx: \mathcal{O}(nr)
Complexity of Sx: \mathcal{O}(nnz(S))
It becomes effective with:
r<<n
nnz(S)<<n^2
def decompose_matrix_with_sparse_correction_optimized(A, rank, threshold=1e-3):
U, sigma, Vt = randomized_svd(A, n_components=rank, n_iter=5, random_state=None)
U_r = U[:, :rank]
Sigma_r = sigma[:rank]
Vt_r = Vt[:rank, :]
B = (U_r * Sigma_r) @ Vt_r
S_dense = A - B
S_dense[np.abs(S_dense) < threshold] = 0
S_sparse = scipy.sparse.csr_matrix(S_dense)
return U_r, Sigma_r, Vt_r, S_sparse
def optimized_multiply(U, Sigma, Vt, S, x):
temp = Vt @ x
temp = Sigma * temp
Bx = U @ temp
Sx = S @ x
return Bx + Sx# Parameters
sizes = np.arange(200, 4000, 200)
threshold = 0.6
max_rank = 0.05mses = []
exact_times = []
approx_times = []
svd_times = []for i, n in enumerate(sizes):
A = np.random.rand(n, n)
x = np.random.rand(n)
rank = int(n*max_rank)
start_time = time.time()
U_r, Sigma_r, Vt_r, S_sparse = decompose_matrix_with_sparse_correction_optimized(A, rank, threshold)
svd_times.append(time.time() - start_time)
# Measure time for exact multiplication
start_time = time.time()
exact_result = A @ x
exact_times.append(time.time() - start_time)
# Measure time for decomposition-based multiplication
start_time = time.time()
approx_result = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_times.append(time.time() - start_time)
mse = np.mean(((exact_result - approx_result)/exact_result) ** 2)
mses.append(mse)
print("Step ", i, "of", len(sizes)," sparsity: ", S_sparse.nnz/n/n)plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(sizes, mses, marker='o', label="MSE vs Rank")
plt.xlabel("Size")
plt.ylabel("Mean Squared Error (MSE)")
plt.title("MSE Growth as Size Grows")
plt.grid(True)
plt.legend()
# Plot Time Comparison
plt.subplot(1, 2, 2)
plt.plot(sizes, exact_times, label="Exact Multiplication Time", marker='o')
plt.plot(sizes, approx_times, label="Approximate Multiplication Time", marker='x')
#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
plt.xlabel("Size")
plt.ylabel("Time (seconds)")
plt.title("Time Comparison: Exact vs Approximate Multiplication")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()n = 3000
threshold = 0.5
max_ranks = np.arange(0.0, 1.0, 0.05)mses = []
approx_times = []
svd_times = []A = np.random.rand(n, n)
x = np.random.rand(n)
start_time = time.time()
exact_result = A @ x
exact_time = time.time() - start_time
U, Sigma, Vt, S = decompose_matrix_with_sparse_correction_optimized(A, n, 0)
for i, max_rank in enumerate(max_ranks):
rank = max(int(n*max_rank), 1)
U_r = U[:, :rank]
Sigma_r = Sigma[:rank]
Vt_r = Vt[:rank, :]
B = (U_r * Sigma_r) @ Vt_r
S_dense = A - B
S_dense[np.abs(S_dense) < threshold] = 0
S_sparse = scipy.sparse.csr_matrix(S_dense)
# Measure time for decomposition-based multiplication
start_time = time.time()
approx_result = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_times.append(time.time() - start_time)
mse = np.mean(((exact_result - approx_result)/exact_result) ** 2)
mses.append(mse)
print("Step ", i, "of", len(max_ranks)," sparsity: ", S_sparse.nnz/n/n)plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(max_ranks, mses, marker='o', label="MSE vs Rank")
plt.xlabel("Size")
plt.ylabel("Mean Squared Error (MSE)")
plt.title("MSE Growth as Rank is Reduced")
plt.grid(True)
plt.legend()
# Plot Time Comparison
plt.subplot(1, 2, 2)
plt.plot(max_ranks, np.ones(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(max_ranks, approx_times, label="Approximate Multiplication Time", marker='x')
#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
plt.xlabel("Size")
plt.ylabel("Time (seconds)")
plt.title("Time Comparison: Exact vs Approximate Multiplication")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()n = 3200
thresholds = np.arange(0.0, 1.0, 0.05)
rank = 200mses = []
approx_times = []
svd_times = []
A = np.random.rand(n, n)
x = np.random.rand(n)
start_time = time.time()
exact_result = A @ x
exact_time = time.time() - start_time
U_r, Sigma_r, Vt_r, S = decompose_matrix_with_sparse_correction_optimized(A, rank, 0)
for i, threshold in enumerate(thresholds):
S_sparse = S.toarray()
S_sparse[np.abs(S_sparse) < threshold] = 0
S_sparse = scipy.sparse.csr_matrix(S_sparse)
# Measure time for decomposition-based multiplication
start_time = time.time()
approx_result = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_times.append(time.time() - start_time)
mse = np.mean(((exact_result - approx_result)/exact_result) ** 2)
mses.append(mse)
print("Step ", i, "of", len(max_ranks)," sparsity: ", S_sparse.nnz/n/n)plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(thresholds, mses, marker='o', label="MSE vs Rank")
plt.xlabel("Size")
plt.ylabel("Mean Squared Error (MSE)")
plt.title("MSE Growth as Density is Reduced")
plt.grid(True)
plt.legend()
# Plot Time Comparison
plt.subplot(1, 2, 2)
plt.plot(thresholds, np.ones(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(thresholds, approx_times, label="Approximate Multiplication Time", marker='x')
#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
plt.xlabel("Size")
plt.ylabel("Time (seconds)")
plt.title("Time Comparison: Exact vs Approximate Multiplication")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()mses = []
approx_times = []
svd_times = []
A = np.random.rand(n, n)
x = np.random.rand(n)
start_time = time.time()
exact_result = A @ x
exact_time = time.time() - start_time
U_r, Sigma_r, Vt_r, S = decompose_matrix_with_sparse_correction_optimized(A, rank, 0)
for i, threshold in enumerate(thresholds):
S_sparse = S.toarray()
S_sparse[np.abs(S_sparse) < threshold] = 0
#S_sparse = scipy.sparse.csr_matrix(S_sparse)
# Measure time for decomposition-based multiplication
start_time = time.time()
approx_result = optimized_multiply(U_r, Sigma_r, Vt_r, S_sparse, x)
approx_times.append(time.time() - start_time)
mse = np.mean(((exact_result - approx_result)/exact_result) ** 2)
mses.append(mse)
print("Step ", i, "of", len(max_ranks))plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(thresholds, mses, marker='o', label="MSE vs Rank")
plt.xlabel("Size")
plt.ylabel("Mean Squared Error (MSE)")
plt.title("MSE Growth as Density is Reduced")
plt.grid(True)
plt.legend()
# Plot Time Comparison
plt.subplot(1, 2, 2)
plt.plot(thresholds, np.ones(len(approx_times))*exact_time, label="Exact Multiplication Time", marker='o')
plt.plot(thresholds, approx_times, label="Approximate Multiplication Time", marker='x')
#plt.plot(sizes, svd_times, label="SVD Time", marker='o')
plt.xlabel("Size")
plt.ylabel("Time (seconds)")
plt.title("Time Comparison: Exact vs Approximate Multiplication")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()