- library(dplyr)
- fdata$ratio<- fdata$L1Y_Flight_Count/fdata$P1Y_Flight_Count
- pre<- filter(fdata,FLIGHT_COUNT>6)
- pre<- select(fdata,c("FFP_TIER","AVG_INTERVAL","avg_discount","EXCHANGE_COUNT","ratio"))
- pre$ratiotype<- as.factor(ifelse(pre$ratio<0.5,"已流失",ifelse((pre$ratio>=0.5 & pre$ratio<0.9),"准流失","未流失")))
- pre<- pre[,-5]
- table(pre$ratiotype)
- summary(pre)
- pre2<- scale(pre[,-5])
- pre<- data.frame(pre2,pre$ratiotype)
- names(pre)
- #训练集、测试集
- set.seed(12345)
- a<- sample(2,nrow(pre),replace = TRUE,prob = c(0.8,0.2))
- train<- pre[a==1,]
- test<- pre[a==2,]
- str(train$pre.ratiotype)
- table(train$pre.ratiotype)
- # k近邻法
- library(class)
- knn.model<- knn(train[,-5],test[,-5],train$pre.ratiotype,k=7)
- summary(knn.model)
- tab<- table(test$pre.ratiotype,knn.model,dnn = c("acctual","predict"))
- sum(diag(tab))*100/sum(tab)
准确率只有53%,请问是哪里的问题呢?
数据结构如下:
对测试集的预测结果如下:
还有一个问题,什么时候对自变量进行标准化呢,这里标准化和不标准化我都试了一下,结果没有区别
请老师们指导!


雷达卡


京公网安备 11010802022788号







