阅读权限 255 威望 0 级论坛币 50136 个 通用积分 1.0002 学术水平 4 点 热心指数 12 点 信用等级 5 点 经验 5550 点 帖子 252 精华 0 在线时间 111 小时 注册时间 2016-2-16 最后登录 2017-5-25
线性回归
#Import Library
#Import other necessary libraries like pandas,
#numpy...
from sklearn import linear_model
#Load Train and Test datasets
#Identify feature and response variable(s) and
#values must be numeric and numpy arrays
x_train=input_variables_values_training_datasets y_train=target_variables_values_training_datasets x_test=input_variables_values_test_datasets
#Create linear regression objectlinear = linear_model.LinearRegression()
#Train the model using the training sets and #check scorelinear.fit(x_train, y_train) linear.score(x_train, y_train)
#Equation coefficient and Intercept print('Coefficient: \n', linear.coef_) print('Intercept: \n', linear.intercept_) #Predict Output
predicted= linear.predict(x_test)
#Load Train and Test datasets
#Identify feature and response variable(s) and
#values must be numeric and numpy arrays
x_train <- input_variables_values_training_datasets
y_train <- target_variables_values_training_datasets
x_test <- input_variables_values_test_datasets
x <- cbind(x_train,y_train)
#Train the model using the training sets and
#check score
linear <- lm(y_train ~ ., data = x)summary(linear)
#Predict Output
predicted= predict(linear,x_test)
逻辑回归
#Import Library
from sklearn.linear_model import LogisticRegression
#Assumed you have, X (predictor) and Y (target)
#for training data set and x_test(predictor)
#of test_dataset
#Create logistic regression object
model = LogisticRegression()
#Train the model using the training sets
#and check score
model.fit(X, y)
model.score(X, y)
#Equation coefficient and Intercept
print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)
#Predict Output
predicted= model.predict(x_test)
x <- cbind(x_train,y_train)
#Train the model using the training sets and check #score
logistic <- glm(y_train ~ ., data = x,family='binomial') summary(logistic)
#Predict Outputpredicted= predict(logistic,x_test)
决
策
树
#Import Library
#Import other necessary libraries like pandas, numpy... from sklearn import tree
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of #test_dataset
#Create tree objectmodel = tree.DecisionTreeClassifier(criterion='gini') #for classification, here you can change the #algorithm as gini or entropy (information gain) by
#default it is gin
#model = tree.DecisionTreeRegressor() for
#regression
#Train the model using the training sets and check #score
model.fit(X, y)
model.score(X, y)
#Predict Outputpredicted= model.predict(x_test)
#Import Library
library(rpart)
x <-cbind(x_train,y_train)
#grow tree
fit <- rpart(y_train ~ ., data = x,method="class") summary(fit)
#Predict Outputpredicted= predict(fit,x_test)
支持
向量机
#Import Library
from sklearn import svm
#Assumed you have, X (predictor) and Y (target) for #training data set and x_test(predictor) of test_dataset
#Create SVM classification objectmodel = svm.svc()
#there are various options associatedwith it, this is simple for classification.
#Train the model using the training sets and check #score
model.fit(X, y)
model.score(X, y)
#Predict Outputpredicted= model.predict(x_test)
#Import Library
library(e1071)
x <- cbind(x_train,y_train) #Fitting model
fit <-svm(y_train ~ ., data = x) summary(fit)
#Predict Outputpredicted= predict(fit,x_test)
贝叶斯算法
#Import Libraryfrom sklearn.naive_bayes import GaussianNB
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create SVM classification object model = GaussianNB()
#there is other distribution for multinomial classes like Bernoulli Naive Bayes
#Train the model using the training sets and check
#scoremodel.fit(X, y)
#Predict Outputpredicted= model.predict(x_test)
#Import Librarylibrary(e1071)
x <- cbind(x_train,y_train)#Fitting model
fit <-naiveBayes(y_train ~ ., data = x) summary(fit)
#Predict Outputpredicted= predict(fit,x_test)
k-近邻算法析
#Import Library
from sklearn.neighbors import KNeighborsClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create KNeighbors classifier object model KNeighborsClassifier(n_neighbors=6)
#default value for n_neighbors is 5
#Train the model using the training sets and check score model.fit(X, y)
#Predict Outputpredicted= model.predict(x_test)
#Import Librarylibrary(knn)
x <- cbind(x_train,y_train)
#Fitting model
fit <-knn(y_train ~ ., data = x,k=5) summary(fit)
#Predict Output
predicted= predict(fit,x_test)
硬聚类算法
#Import Library
from sklearn.cluster import KMeans
#Assumed you have, X (attributes) for training data set
#and x_test(attributes) of test_dataset
#Create KNeighbors classifier object model
k_means = KMeans(n_clusters=3, random_state=0)
#Train the model using the training sets and check score model.fit(X)
#Predict Outputpredicted= model.predict(x_test)
#Import Library
library(cluster)
fit <- kmeans(X, 3)
#5 cluster solution
随机森林算法
#Import Libraryfrom sklearn.ensemble import RandomForestClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create Random Forest objectmodel= RandomForestClassifier()
#Train the model using the training sets and check score model.fit(X, y)
#Predict Outputpredicted= model.predict(x_test)
#Import Library
library(randomForest)
x <- cbind(x_train,y_train)
#Fitting model
fit <- randomForest(Species ~ ., x,ntree=500) summary(fit)
#Predict Outputpredicted= predict(fit,x_test)
降维算法
#Import Library
from sklearn import decomposition
#Assumed you have training and test data set as train and
#test
#Create PCA object pca= decomposition.PCA(n_components=k) #default value of k =min(n_sample, n_features)
#For Factor analysis
#fa= decomposition.FactorAnalysis()
#Reduced the dimension of training dataset using PCA train_reduced = pca.fit_transform(train)
#Reduced the dimension of test datasettest_reduced = pca.transform(test)
#Import Library
library(stats)
pca <- princomp(train, cor = TRUE)
train_reduced <- predict(pca,train)
test_reduced <- predict(pca,test)
GB
D
T
#Import Library
from sklearn.ensemble import GradientBoostingClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create Gradient Boosting Classifier object
model= GradientBoostingClassifier(n_estimators=100, \ learning_rate=1.0, max_depth=1, random_state=0)
#Train the model using the training sets and check score model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
#Import Library
library(caret)
x <- cbind(x_train,y_train)
#Fitting modelfitControl <- trainControl( method = "repeatedcv", + number = 4, repeats = 4)
fit <- train(y ~ ., data = x, method = "gbm",+ trControl = fitControl,verbose = FALSE)
predicted= predict(fit,x_test,type= "prob")[,2]
大数据文摘编译者简介