[GitHub]Machine Learning for the Web

1关注
62粉丝

VIP

已卖：4901份资源

学术权威

14%

还不是VIP/贵宾

-

TA的文库 其他...

R资源总汇

Panel Data Analysis

Experimental Design

0%

威望: 1 级
论坛币: 49675 个
通用积分: 56.2487
学术水平: 370 点
热心指数: 273 点
信用等级: 335 点
经验: 57805 点
帖子: 4005
精华: 21
在线时间: 582 小时
注册时间: 2005-5-8
最后登录: 2023-11-26

楼主

ReneeBK 发表于 2017-5-20 19:30:44 |AI写论文

是否 +2 论坛币

k人参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群

赵安豆老师微信：zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

立即领取

感谢您参与论坛问题回答

经管之家送您两个论坛币！

+2 论坛币

Machine Learning For The Web

Each folder contains the codes discussed in each chapter of the book
复制代码
本帖隐藏的内容
Machine Learning for the Web.zip (99.55 MB)

2017-5-20 19:30:30 上传

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

分享0 收藏1 回帖

关键词：Learning machine earning GitHub Learn

本帖被以下文库推荐

· 编程语言(Coding Languages)|主题: 3936, 订阅: 126
· 東西方精品圖書|主题: 896, 订阅: 110

沙发

ReneeBK 发表于 2017-5-20 19:31:39

import numpy as np
from sklearn import mixture
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift
from matplotlib import pyplot as plt
# generate two clusters: a with 100 points, b with 50:
np.random.seed(4711) # for repeatability
c1 = np.random.multivariate_normal([10, 0], [[3, 1], [1, 4]], size=[100,])
l1 = np.zeros(100)
l2 = np.ones(100)
c2 = np.random.multivariate_normal([0, 10], [[3, 1], [1, 4]], size=[100,])
print c1.shape
#add noise:
np.random.seed(1) # for repeatability
noise1x = np.random.normal(0,2,100)
noise1y = np.random.normal(0,8,100)
noise2 = np.random.normal(0,8,100)
c1[:,0] += noise1x
c1[:,1] += noise1y
c2[:,1] += noise2
#
fig = plt.figure(figsize=(20,15))
ax = fig.add_subplot(111)
ax.set_xlabel('x',fontsize=30)
ax.set_ylabel('y',fontsize=30)
fig.suptitle('classes',fontsize=30)
labels = np.concatenate((l1,l2),)
X = np.concatenate((c1, c2),)
pp1= ax.scatter(c1[:,0], c1[:,1],cmap='prism',s=50,color='r')
pp2= ax.scatter(c2[:,0], c2[:,1],cmap='prism',s=50,color='g')
ax.legend((pp1,pp2),('class 1', 'class2'),fontsize=35)
fig.savefig('classes.png')
#start figure
fig.clf()#reset plt
fig, ((axis1, axis2), (axis3, axis4)) = plt.subplots(2, 2, sharex='col', sharey='row')
#k-means
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
pred_kmeans = kmeans.labels_
#axis1 = fig.add_subplot(211)
print 'kmeans:',np.unique(kmeans.labels_)
print 'kmeans:',homogeneity_completeness_v_measure(labels,pred_kmeans)
plt.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='prism') # plot points with cluster dependent colors
axis1.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='prism')
#axis1.set_xlabel('x',fontsize=40)
axis1.set_ylabel('y',fontsize=40)
axis1.set_title('k-means',fontsize=20)
#plt.show()
#mean-shift
ms = MeanShift(bandwidth=7)
ms.fit(X)
pred_ms = ms.labels_
axis2.scatter(X[:,0], X[:,1], c=pred_ms, cmap='prism')
axis2.set_title('mean-shift',fontsize=20)
print 'ms:',homogeneity_completeness_v_measure(labels,pred_ms)
print 'ms:',np.unique(ms.labels_)
#gaussian mixture
g = mixture.GMM(n_components=2)
g.fit(X)
print g.means_
pred_gmm = g.predict(X)
print 'gmm:',homogeneity_completeness_v_measure(labels,pred_gmm)
axis3.scatter(X[:,0], X[:,1], c=pred_gmm, cmap='prism')
axis3.set_xlabel('x',fontsize=40)
axis3.set_ylabel('y',fontsize=40)
axis3.set_title('gaussian mixture',fontsize=20)
#hierarchical
# generate the linkage matrix
Z = linkage(X, 'ward')
max_d = 20
pred_h = fcluster(Z, max_d, criterion='distance')
print 'clusters:',np.unique(pred_h)
k=2
fcluster(Z, k, criterion='maxclust')
print 'h:',homogeneity_completeness_v_measure(labels,pred_h)
axis4.scatter(X[:,0], X[:,1], c=pred_h, cmap='prism')
axis4.set_xlabel('x',fontsize=40)
#axis4.set_ylabel('y',fontsize=40)
axis4.set_title('hierarchical ward',fontsize=20)
fig.set_size_inches(18.5,10.5)
fig.savefig('comp_clustering.png', dpi=100)
fig.clf()#reset plt
fig = plt.figure(figsize=(20,15))
plt.title('Hierarchical Clustering Dendrogram',fontsize=30)
plt.xlabel('data point index (or cluster index)',fontsize=30)
plt.ylabel('distance (ward)',fontsize=30)
dendrogram(
Z,
truncate_mode='lastp', # show only the last p merged clusters
p=12, # show only the last p merged clusters
leaf_rotation=90.,
leaf_font_size=12.,
show_contracted=True, # to get a distribution impression in truncated branches
)
fig.savefig('dendrogram.png')
#plt.show()
#measures:
from sklearn.metrics import homogeneity_completeness_v_measure
from sklearn.metrics import silhouette_score
res = homogeneity_completeness_v_measure(labels,pred_kmeans)
print 'kmeans measures, homogeneity:',res[0],' completeness:',res[1],' v-measure:',res[2],' silhouette score:',silhouette_score(X,pred_kmeans)
res = homogeneity_completeness_v_measure(labels,pred_ms)
print 'mean-shift measures, homogeneity:',res[0],' completeness:',res[1],' v-measure:',res[2],' silhouette score:',silhouette_score(X,pred_ms)
res = homogeneity_completeness_v_measure(labels,pred_gmm)
print 'gaussian mixture model measures, homogeneity:',res[0],' completeness:',res[1],' v-measure:',res[2],' silhouette score:',silhouette_score(X,pred_gmm)
res = homogeneity_completeness_v_measure(labels,pred_h)
print 'hierarchical (ward) measures, homogeneity:',res[0],' completeness:',res[1],' v-measure:',res[2],' silhouette score:',silhouette_score(X,pred_h)

复制代码

藤椅

ReneeBK 发表于 2017-5-20 19:32:05

import numpy as np
from matplotlib import pyplot as plt
#line y = 2*x
x = np.arange(1,101,1).astype(float)
y = 5*np.arange(1,101,1).astype(float)
#add noise
noise = np.random.normal(0, 10, 100)
y += noise
fig = plt.figure(figsize=(10,10))
#plot
plt.plot(x,y,'ro')
plt.axis([0,102, -20,220])
plt.quiver(60, 100,10-0, 20-0, scale_units='xy', scale=1)
plt.arrow(60, 100,10-0, 20-0,head_width=2.5, head_length=2.5, fc='k', ec='k')
plt.text(70, 110, r'$v^1$', fontsize=20)
#plt.show()
#save
ax = fig.add_subplot(111)
ax.axis([0,102, -20,220])
ax.set_xlabel('x',fontsize=40)
ax.set_ylabel('y',fontsize=40)
fig.suptitle('2 dimensional dataset',fontsize=40)
fig.savefig('pca_data.png')
#calc PCA
mean_x = np.mean(x)
mean_y = np.mean(y)
mean_vector = np.array([[mean_x],[mean_y]])
u_x = (x- mean_x)/np.std(x)
u_y = (y-mean_y)/np.std(y)
sigma = np.cov([u_x,u_y])
print sigma
eig_vals, eig_vecs = np.linalg.eig(sigma)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i])
for i in range(len(eig_vals))]
eig_pairs.sort()
eig_pairs.reverse()
print eig_pairs
v1 = eig_pairs[0][1]
#leading eigenvector:
x_v1 = v1[0]*np.std(x)+mean_x
y_v1 = v1[1]*np.std(y)+mean_y
print x_v1,'-',y_v1,'slope:',(y_v1)/(x_v1)
from sklearn.decomposition import PCA
#X = np.array([x,y])
X = np.array([u_x,u_y])
X = X.T
#print X
pca = PCA(n_components=1)
pca.fit(X)
V = pca.components_
print V,'-',V[0][1]/V[0][0]
#transform in reduced space
X_red_sklearn = pca.fit_transform(X)
print X_red_sklearn.shape
W = np.array(v1.reshape(2,1))
X_red = W.T.dot(X.T)
#check the reduced matrices are equal
assert X_red.T.all() == X_red_sklearn.all(), 'problem with the pca algorithm'
print X_red.T[0],'-',X_red_sklearn[0]

复制代码

板凳

ReneeBK 发表于 2017-5-20 19:32:35

import numpy as np
from copy import copy
class HMM:
def __init__(self,pi,A,B):
self.pi = pi
self.A = A
self.B = B
def MostLikelyStateSequence(self,observations):
#calc combinations:
N = self.A.shape[0]
T = len(observations)
sequences = [str(i) for i in range(N)]
probs = np.array([self.pi[i]*self.B[i,observations[0]] for i in range(N)])
print probs
for i in range(1,T):
newsequences = []
newprobs = np.array([])
for s in range(len(sequences)):
for j in range(N):
newsequences.append(sequences[s]+str(j))
bef = int(sequences[s][-1])
tTpprob = probs[s]*self.A[bef,j]*self.B[j,observations[i]]
newprobs = np.append(newprobs,[tTpprob])
print sequences[s]+str(j),'-',tTpprob
sequences = newsequences
probs = newprobs
return max((probs[i],sequences[i]) for i in range(len(sequences)))
def ViterbiSequence(self,observations):
deltas = [{}]
seq = {}
N = self.A.shape[0]
states = [i for i in range(N)]
T = len(observations)
#initialization
for s in states:
deltas[0][s] = self.pi[s]*self.B[s,observations[0]]
seq[s] = [s]
#compute Viterbi
for t in range(1,T):
deltas.append({})
newseq = {}
for s in states:
(delta,state) = max((deltas[t-1][s0]*self.A[s0,s]*self.B[s,observations[t]],s0) for s0 in states)
deltas[t][s] = delta
newseq[s] = seq[state] + [s]
seq = newseq
(delta,state) = max((deltas[T-1][s],s) for s in states)
return delta,' sequence: ', seq[state]
def maxProbSequence(self,observations):
N = self.A.shape[0]
states = [i for i in range(N)]
T = len(observations)
M = self.B.shape[1]
# alpha_t(i) = P(O_1 O_2 ... O_t, q_t = S_i | hmm)
# Initialize alpha
alpha = np.zeros((N,T))
c = np.zeros(T) #scale factors
alpha[:,0] = pi.T * self.B[:,observations[0]]
c[0] = 1.0/np.sum(alpha[:,0])
alpha[:,0] = c[0] * alpha[:,0]
# Update alpha for each observation step
for t in range(1,T):
alpha[:,t] = np.dot(alpha[:,t-1].T, self.A).T * self.B[:,observations[t]]
c[t] = 1.0/np.sum(alpha[:,t])
alpha[:,t] = c[t] * alpha[:,t]
# beta_t(i) = P(O_t+1 O_t+2 ... O_T | q_t = S_i , hmm)
# Initialize beta
beta = np.zeros((N,T))
beta[:,T-1] = 1
beta[:,T-1] = c[T-1] * beta[:,T-1]
# Update beta backwards froT end of sequence
for t in range(len(observations)-1,0,-1):
beta[:,t-1] = np.dot(self.A, (self.B[:,observations[t]] * beta[:,t]))
beta[:,t-1] = c[t-1] * beta[:,t-1]
norm = np.sum(alpha[:,T-1])
seq = ''
for t in range(T):
g,state = max(((beta[i,t]*alpha[i,t])/norm,i) for i in states)
seq +=str(state)
return seq
def simulate(self,time):
def drawFromNormal(probs):
return np.where(np.random.multinomial(1,probs) == 1)[0][0]
observations = np.zeros(time)
states = np.zeros(time)
states[0] = drawFromNormal(self.pi)
observations[0] = drawFromNormal(self.B[states[0],:])
for t in range(1,time):
states[t] = drawFromNormal(self.A[states[t-1],:])
observations[t] = drawFromNormal(self.B[states[t],:])
return observations,states
def train(self,observations,criterion):
N = self.A.shape[0]
T = len(observations)
M = self.B.shape[1]
A = self.A
B = self.B
pi = copy(self.pi)
convergence = False
while not convergence:
# alpha_t(i) = P(O_1 O_2 ... O_t, q_t = S_i | hmm)
# Initialize alpha
alpha = np.zeros((N,T))
c = np.zeros(T) #scale factors
alpha[:,0] = pi.T * self.B[:,observations[0]]
c[0] = 1.0/np.sum(alpha[:,0])
alpha[:,0] = c[0] * alpha[:,0]
# Update alpha for each observation step
for t in range(1,T):
alpha[:,t] = np.dot(alpha[:,t-1].T, self.A).T * self.B[:,observations[t]]
c[t] = 1.0/np.sum(alpha[:,t])
alpha[:,t] = c[t] * alpha[:,t]
#P(O=O_0,O_1,...,O_T-1 | hmm)
P_O = np.sum(alpha[:,T-1])
# beta_t(i) = P(O_t+1 O_t+2 ... O_T | q_t = S_i , hmm)
# Initialize beta
beta = np.zeros((N,T))
beta[:,T-1] = 1
beta[:,T-1] = c[T-1] * beta[:,T-1]
# Update beta backwards froT end of sequence
for t in range(len(observations)-1,0,-1):
beta[:,t-1] = np.dot(self.A, (self.B[:,observations[t]] * beta[:,t]))
beta[:,t-1] = c[t-1] * beta[:,t-1]
gi = np.zeros((N,N,T-1));
for t in range(T-1):
for i in range(N):
gamma_num = alpha[i,t] * self.A[i,:] * self.B[:,observations[t+1]].T * \
beta[:,t+1].T
gi[i,:,t] = gamma_num / P_O
# gamma_t(i) = P(q_t = S_i | O, hmm)
gamma = np.squeeze(np.sum(gi,axis=1))
# Need final gamma element for new B
prod = (alpha[:,T-1] * beta[:,T-1]).reshape((-1,1))
gamma_T = prod/P_O
gamma = np.hstack((gamma, gamma_T)) #append one Tore to gamma!!!
newpi = gamma[:,0]
newA = np.sum(gi,2) / np.sum(gamma[:,:-1],axis=1).reshape((-1,1))
newB = copy(B)
sumgamma = np.sum(gamma,axis=1)
for ob_k in range(M):
list_k = observations == ob_k
newB[:,ob_k] = np.sum(gamma[:,list_k],axis=1) / sumgamma
if np.max(abs(pi - newpi)) < criterion and \
np.max(abs(A - newA)) < criterion and \
np.max(abs(B - newB)) < criterion:
convergence = True;
A[:],B[:],pi[:] = newA,newB,newpi
self.A[:] = newA
self.B[:] = newB
self.pi[:] = newpi
self.gamma = gamma
if __name__ == '__main__':
pi = np.array([0.6, 0.4])
A = np.array([[0.7, 0.3],
[0.6, 0.4]])
B = np.array([[0.7, 0.1, 0.2],
[0.1, 0.6, 0.3]])
hmmguess = HMM(pi,A,B)
print 'Viterbi sequence:',hmmguess.ViterbiSequence(np.array([0,1,0,2]))
print 'max prob sequence:',hmmguess.maxProbSequence(np.array([0,1,0,2]))
#obs,states = hmmguess.simulate(4)
hmmguess.train(np.array([0,1,0,2]),0.000001)
print 'Estimated initial probabilities:',hmmguess.pi
print 'Estimated state transition probabililities:',hmmguess.A
print 'Estimated observation probabililities:',hmmguess.B

复制代码

报纸

ReneeBK 发表于 2017-5-20 19:37:28

import pandas as pd
import numpy as np
from sklearn import cross_validation
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
#read data in
df = pd.read_csv('data_cars.csv',header=None)
for i in range(len(df.columns)):
df[i] = df[i].astype('category')
df.head()
#map catgories to values
map0 = dict( zip( df[0].cat.categories, range( len(df[0].cat.categories ))))
#print map0
map1 = dict( zip( df[1].cat.categories, range( len(df[1].cat.categories ))))
map2 = dict( zip( df[2].cat.categories, range( len(df[2].cat.categories ))))
map3 = dict( zip( df[3].cat.categories, range( len(df[3].cat.categories ))))
map4 = dict( zip( df[4].cat.categories, range( len(df[4].cat.categories ))))
map5 = dict( zip( df[5].cat.categories, range( len(df[5].cat.categories ))))
map6 = dict( zip( df[6].cat.categories, range( len(df[6].cat.categories ))))
cat_cols = df.select_dtypes(['category']).columns
df[cat_cols] = df[cat_cols].apply(lambda x: x.cat.codes)
df = df.iloc[np.random.permutation(len(df))]
print df.head()
cv = 10
method = 'linear support vector machine'
clf = svm.SVC(kernel='linear',C=50)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'rbf support vector machine'
clf = svm.SVC(kernel='rbf',C=50)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'poly support vector machine'
clf = svm.SVC(kernel='poly',C=50)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'decision tree'
clf = DecisionTreeClassifier(random_state=0)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'random forest'
clf = RandomForestClassifier(n_estimators=50,random_state=0,max_features=None)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'naive bayes'
clf = MultinomialNB()
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'logistic regression'
clf = LogisticRegression()
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)
method = 'k nearest neighbours'
clf = KNeighborsClassifier(weights='distance',n_neighbors=5)
y_pred = cross_validation.cross_val_predict(clf, X,Y, cv=cv)
CalcMeasures(method,y_pred,Y)

复制代码

地板

ReneeBK 发表于 2017-5-20 19:39:47

In [1]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
#line y = 2*x
x = np.arange(1,101,1).astype(float)
y = 2*np.arange(1,101,1).astype(float)
#add noise
noise = np.random.normal(0, 10, 100)
y += noise
fig = plt.figure(figsize=(10,10))
#plot
plt.plot(x,y,'ro')
plt.axis([0,102, -20,220])
plt.quiver(60, 100,10-0, 20-0, scale_units='xy', scale=1)
plt.arrow(60, 100,10-0, 20-0,head_width=2.5, head_length=2.5, fc='k', ec='k')
plt.text(70, 110, r'$v^1$', fontsize=20)
#save
ax = fig.add_subplot(111)
ax.axis([0,102, -20,220])
ax.set_xlabel('x',fontsize=40)
ax.set_ylabel('y',fontsize=40)
fig.suptitle('2 dimensional dataset',fontsize=40)
fig.savefig('pca_data.png')
In [3]:
#calc PCA
mean_x = np.mean(x)
mean_y = np.mean(y)
mean_vector = np.array([[mean_x],[mean_y]])
u_x = (x- mean_x)/np.std(x)
u_y = (y-mean_y)/np.std(y)
sigma = np.cov([u_x,u_y])
print sigma
eig_vals, eig_vecs = np.linalg.eig(sigma)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i])
for i in range(len(eig_vals))]
eig_pairs.sort()
eig_pairs.reverse()
print eig_pairs
v1 = eig_pairs[0][1]
#leading eigenvector:
x_v1 = v1[0]*np.std(x)+mean_x
y_v1 = v1[1]*np.std(y)+mean_y
print 'slope:',(y_v1)/(x_v1)
from sklearn.decomposition import PCA
#X = np.array([x,y])
X = np.array([u_x,u_y])
X = X.T
#print X
pca = PCA(n_components=1)
pca.fit(X)
V = pca.components_
print V,'-',V[0][1]/V[0][0]
#transform in reduced space
X_red_sklearn = pca.fit_transform(X)
print X_red_sklearn.shape
W = np.array(v1.reshape(2,1))
X_red = W.T.dot(X.T)
#check the reduced matrices are equal
assert X_red.T.all() == X_red_sklearn.all(), 'problem with the pca algorithm'
print X_red.T[0],'-',X_red_sklearn[0]

复制代码

加关注串个门加好友发消息 6关注 76粉丝禁止访问 auirzxp 当前离线阅读权限 0 威望 1 级论坛币 229692 个通用积分 25371.2470 学术水平 4223 点热心指数 4861 点信用等级 4173 点经验 4493 点帖子 13491 精华 0 在线时间 12559 小时注册时间 2007-1-3 最后登录 2024-4-8 雷达卡	7楼 auirzxp 发表于 2017-5-20 19:58:36 提示: 作者被禁止或删除内容自动屏蔽

	回复举报

8楼

taotao@sina 发表于 2017-5-20 20:41:02

感谢楼主分享

9楼

h2h2 发表于 2017-5-20 23:29:27

谢谢分享

10楼

yrhkxg 发表于 2017-5-21 00:24:37

[GitHub]Machine Learning for the Web [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

本帖隐藏的内容

扫码加我拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

初级热心勋章

中级热心勋章

高级热心勋章

特级热心勋章

初级信用勋章

本版微信群

[GitHub]Machine Learning for the Web [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

本帖隐藏的内容

扫码加我 拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

初级热心勋章

中级热心勋章

高级热心勋章

特级热心勋章

初级信用勋章

本版微信群

扫码加我拉你入群