#/usr/nom/env python
# _*_coding:utf-8_*_
# @Time :2021/9/3 10:04
# @Author :A bigfish
# @FileName :maindemo13.py
# @Software :PyCharm
import matplotlib.pyplot as plt
import numpy as np
from pylab import *
# 首先导入数据,此部分为从存储列表或单元中读取分析数据
def loadDataSet(filename, delim=
'\t'
):
#此处的'\t'表示不同变量间的分隔符,t表示tab键键入的空格
fr = open(filename)
stringArr = [line.strip().split(delim)
for
line
in
fr.readlines()]
dataArr = [list(map(float, line))
for
line
in
stringArr]
return
np.mat(dataArr)
# 定义pca分析函数
def pca(dataset, topNfeat = 99999):
#topNfeat特征值数目,通常不用设置,因为后续要进行可视化分析
meanVals = np.mean(dataset, axis=0)
#求均值
meanRemoved = dataset - meanVals
#预处理
covMat = np.cov(meanRemoved, rowvar=0)
#求解输入数据协方差矩阵
eigVals, eigVects = np.linalg.eig(np.mat(covMat))
#求解特征值,特征向量
eigVaInd = np.argsort(eigVals)
#对特征值进行排序处理,默认为升序
eigVaInd = eigVaInd[-1:-(topNfeat):-1]
#根据指定数目进行逆序处理
redEigVects = eigVects[:,eigVaInd]
#选取对应特征向量
lowDataMat = meanRemoved * redEigVects
#数据降维X*P
recontMat = (lowDataMat * redEigVects.T) + meanVals
#c处理进行了数据重构,非必须选项
return
lowDataMat, recontMat, eigVals
#返回数据
# 定义特值值绘制函数
def plotEig(dataset, numFeat=20):
mpl.rcParams[
'font.sans-serif'
] = [
'Times NewRoman'
]
sumData = np.zeros((1, numFeat))
dataset = dataset / sum(dataset)
for
i
in
range(numFeat):
sumData[0, i] = sum(dataset[0:i])
X = np.linspace(1, numFeat, numFeat)
fig = plt.figure()
ax = fig.add_subplot(211)
ax.plot(X, (sumData*100).T,
'r-+'
)
mpl.rcParams[
'font.sans-serif'
] = [
'SimHei'
]
plt.ylabel(
'累计方差百分比'
)
ax2 = fig.add_subplot(212)
ax2.plot(X.T, (dataset[0:numFeat].T)*100,
'b-*'
)
plt.xlabel(
'主成分数'
)
plt.ylabel(
'方差百分比'
)
plt.show()
# 定义原始数据及第一主成分绘制函数
def plotData(OrigData, recData):
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(OrigData[:,0].flatten().A[0], OrigData[:, 1].flatten().A[0], c=
'blue'
,marker=
'^'
, s=90)
ax.scatter(recData[:, 0].flatten().A[0], recData[:, 1].flatten().A[0], c=
'red'
, marker=
'o'
,s=90)
plt.show()