PCA dimensionality reduction and reconstruction two-dimensional data

Beijing Institute of Technology | Ming-Jian Li

The following Python code is a companion code for the course on Artificial Intelligence and Simulation Science. It functions to reduce the dimensionality of the 1000 × 1000 surface data (with noise) in the accompanying script from 1000 features to only 20 principal components via PCA (Principal Component Analysis), and then reconstruct the original-sized surface for visualization.


x
1
import numpy as np
2
import matplotlib.pyplot as plt
3
from sklearn.decomposition import PCA
4
from mpl_toolkits.mplot3d import Axes3D
5

6
n = 1000
7
x = np.linspace(-3, 3, n)
8
y = np.linspace(-3, 3, n)
9
X, Y = np.meshgrid(x, y)
10

11
Z_base = 0.05 * np.sin(2 * X + 3 * Y) + 0.03 * np.cos(4 * X - 2 * Y)
12
Z_main = 1.2 * np.exp(-(X**2 + Y**2) / 0.6)
13

14
np.random.seed(10)
15
for _ in range(6):
16
    xm = np.random.uniform(-2, 2)
17
    ym = np.random.uniform(-2, 2)
18
    hm = np.random.uniform(-0.2, 0.7)
19
    wm = np.random.uniform(0.2, 0.8)
20
    Z_base += hm * np.exp(-((X - xm)**2 + (Y - ym)**2) / wm)
21

22
Z_clean = Z_main + Z_base
23

24
sig_power = np.mean(Z_clean**2)
25
noise_power = sig_power / (10**(20 / 10))
26
noise = np.random.normal(0, np.sqrt(noise_power), Z_clean.shape)
27
Z_orig = Z_clean + noise
28

29
data = Z_orig
30
k = 20
31
pca = PCA(n_components=k)
32
Z_low = pca.fit_transform(data)
33
Z_recon = pca.inverse_transform(Z_low)
34

35
def plot_surface(Z, title, cmap='viridis'):
36
    fig = plt.figure(figsize=(6, 5))
37
    ax = fig.add_subplot(111, projection='3d')
38
    x = np.arange(Z.shape[1])
39
    y = np.arange(Z.shape[0])
40
    X, Y = np.meshgrid(x, y)
41
    ax.plot_surface(X, Y, Z, cmap=cmap, rstride=10, cstride=10, alpha=1)
42
    ax.set_title(title, fontsize=12)
43
    plt.tight_layout()
44
    return fig
45

46
fig1 = plot_surface(Z_orig, '1. Original data (1000×1000)')
47
fig2 = plot_surface(Z_low[:20, :20], '2. Latent variables', cmap='plasma')
48
fig3 = plot_surface(Z_recon, '3. Reconstructed data (1000×1000)')
49

50
plt.show()

The result is as follows.