import math import random import matplotlib.pyplot as plt from numpy import *
n_components = 2
def f1(x, period = 4): return 0.5(x-math.floor(x/period)period)
def create_data(): #data number n = 500 #data time T = [0.1*xi for xi in range(0, n)] #source S = array([[sin(xi) for xi in T], [f1(xi) for xi in T]], float32) #mix matrix A = array([[0.8, 0.2], [-0.3, -0.7]], float32) return T, S, dot(A, S)
def whiten(X): #zero mean X_mean = X.mean(axis=-1) X -= X_mean[:, newaxis] #whiten A = dot(X, X.transpose()) D , E = linalg.eig(A) D2 = linalg.inv(array([[D[0], 0.0], [0.0, D[1]]], float32)) D2[0,0] = sqrt(D2[0,0]); D2[1,1] = sqrt(D2[1,1]) V = dot(D2, E.transpose()) return dot(V, X), V
def do_fastica(X): n, m = X.shape; p = float(m); g = _logcosh #black magic X *= sqrt(X.shape[1]) #create w W = ones((n,n), float32) for i in range(n): for j in range(i): W[i,j] = random.random()
#compute W
maxIter = 200
for ii in range(maxIter):
gwtx, g_wtx = g(dot(W, X))
W1 = do_decorrelation(dot(gwtx, X.T) / p - g_wtx[:, newaxis] * W)
lim = max( abs(abs(diag(dot(W1, W.T))) - 1) )
W = W1
if lim < 0.0001:
break
return W
def show_data(T, S): plt.plot(T, [S[0,i] for i in range(S.shape[1])], marker=”*”) plt.plot(T, [S[1,i] for i in range(S.shape[1])], marker=”o”) plt.show()
def main(): T, S, D = create_data() Dwhiten, K = whiten(D) W = do_fastica(Dwhiten) #Sr: reconstructed source Sr = dot(dot(W, K), D) show_data(T, D) show_data(T, S) show_data(T, Sr)
##Python实现PCA
import numpy as np
def pca(X,k):#k is the components you want#mean of each feature
n_samples, n_features = X.shape
mean=np.array([np.mean(X[:,i]) for i in range(n_features)])
#normalization
norm_X=X-mean
#scatter matrix
scatter_matrix=np.dot(np.transpose(norm_X),norm_X)
#Calculate the eigenvectors and eigenvalues
eig_val, eig_vec = np.linalg.eig(scatter_matrix)
eig_pairs = [(np.abs(eig_val[i]), eig_vec[:,i]) for i in range(n_features)]
# sort eig_vec based on eig_val from highest to lowest
eig_pairs.sort(reverse=True)
# select the top k eig_vec
feature=np.array([ele[1] for ele in eig_pairs[:k]])
#get new data
data=np.dot(norm_X,np.transpose(feature))
return data
X = np.array([[-1, 1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
print(pca(X,1))
用sklearn的PCA与我们的PCA做个比较:
##用sklearn的PCA
from sklearn.decomposition import PCA
import numpy as np
X = np.array([[-1, 1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca=PCA(n_components=1)
pca.fit(X)
print(pca.transform(X))
奇异值分解(Singular Value Decompositionm,简称SVD)是在机器学习领域应用较为广泛的算法之一,也是学习机器学习算法绕不开的基石之一。SVD算法主要用在降维算法中的特征分解、推荐系统、自然语言处理计算机视觉等领域。奇异值分解(SVD)通俗一点讲就是将一个线性变换分解为两个线性变换,一个线性变换代表旋转,一个线性变换代表拉伸。