%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
iris = load_iris()
y = iris.target
x = iris.data
print(x.shape)
(150, 4)
pca = PCA(n_components=2)
pca = pca.fit(x)
x_dr = pca.transform(x)
print(x_dr.shape)
(150, 2)
colors = ["red", "black", "orange"]
plt.figure()
for i in range(3):
plt.scatter(x_dr[y == i, 0]
,x_dr[y == i, 1]
,alpha=0.7
,c=colors[i]
,label=iris.target_names[i]
)
plt.legend()
plt.title("PCA of IRIS datasets")
plt.show()
pca_line = PCA().fit(x)
plt.plot([1, 2, 3, 4], np.cumsum(pca_line.explained_variance_ratio_))
plt.xticks([1, 2, 3, 4])
plt.xlabel("number of components after dimension reduction")
plt.ylabel("cumulative explained variance ratio")
plt.show()
pca_line.explained_variance_ratio_
array([0.92461872, 0.05306648, 0.01710261, 0.00521218])
pca_mle = PCA(n_components="mle")
pca_mle = pca_mle.fit(x)
x_mle = pca_mle.transform(x)
print(x_mle.shape)
(150, 3)
pca_mle.explained_variance_ratio_.sum()
0.9947878161267246
pca_f = PCA(n_components=0.97, svd_solver="full")
pca_f = pca_f.fit(x)
x_f = pca_f.transform(x)
pca_f.explained_variance_ratio_
array([0.92461872, 0.05306648])
pca_f.components_
array([[ 0.36138659, -0.08452251, 0.85667061, 0.3582892 ], [ 0.65658877, 0.73016143, -0.17337266, -0.07548102]])
x.shape
(150, 4)
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(data_home=".", min_faces_per_person=60)
print("数据: {}".format(faces.images.shape))
print("特征矩阵: {}".format(faces.data.shape))
数据: (1348, 62, 47) 特征矩阵: (1348, 2914)
fig, axes = plt.subplots(4, 5
,figsize=(8, 4)
,subplot_kw={"xticks":[], "yticks":[]} # 不显示坐标轴
)
for i, ax in enumerate(axes.flat):
ax.imshow(faces.images[i,:,:]
,cmap="gray" # 色彩模式
)
fig
X = faces.data
pca = PCA(n_components=150).fit(X)
pca.components_[:3]
array([[-0.00579721, -0.00595371, -0.00615771, ..., -0.01000111, -0.00901092, -0.00813917], [ 0.01708354, 0.01623676, 0.01622024, ..., -0.03474263, -0.03416986, -0.03298327], [-0.01833663, -0.01670169, -0.01557038, ..., -0.03540288, -0.03147691, -0.02929779]], dtype=float32)
V = pca.components_
print(V.shape)
(150, 2914)
fig, axes = plt.subplots(3, 8
,figsize=(8, 4)
,subplot_kw={"xticks":[], "yticks":[]}
)
for i, ax in enumerate(axes.flat):
ax.imshow(V[i,:].reshape(62, 47), cmap="gray")
X_dr = pca.transform(X)
X_dr.shape
(1348, 150)
X_inverse = pca.inverse_transform(X_dr)
X_inverse.shape
(1348, 2914)
fig, ax = plt.subplots(2, 10
,figsize=(10, 2.5)
,subplot_kw={"xticks":[], "yticks":[]}
)
for i in range(10):
ax[0, i].imshow(faces.images[i, :, :], cmap="binary_r")
ax[1, i].imshow(X_inverse[i].reshape(62, 47), cmap="binary_r")
fig