trainData4F <- inputdataTrain[which(inputdataTrain$FLAG==1),]
trainData4G <- inputdataTrain[which(inputdataTrain$FLAG==0),]
failNum <- nrow(trainData4F)
goodNum <- nrow(trainData4G)
gfRate <- floor(goodNum/failNum)
predTests <- matrix(0,nrow(inputdataTest),gfRate)
predlast <- matrix(0,nrow(inputdataTest),gfRate)
rowIndexs <- 1:nrow(trainData4G)
set.seed(50)
for(i in 1:(gfRate-1)){
train <- NULL
idxtemp <- sample(1:(goodNum-failNum*(i-1)),failNum,replace=FALSE)
train <- rbind(trainData4G,trainData4F[rowIndexs[idxtemp],])
# fit <- randomForest(FLAG~., data=train, distribution="bernoulli", importance=TRUE)
# glm.model<-glm(FLAG~.,data=train,family=binomial(logit))
# stepModel<- stepAIC(glm.model,direction="both")
# rfpred[,i]<- glm.pred <- predict(stepModel, newdata=inputdataTest,type="response")
fit <- randomForest(FLAG~., data=train, distribution="bernoulli", importance=TRUE)
predTests[,i] <- as.numeric(predict(fit,newdata=inputdataTest,type="response"))
predTests[,i] <- predTests[,i]-1
rowIndexs <- setdiff(rowIndexs,idxtemp)
}
predTests[,gfRate] <- inputdataTest$FLAG
predlast[,1] <- inputdataTest$FLAG
predlast[,2] <- apply(predTests, 1, mean)
# for(b in 1:nrow(inputdataTest)){
# predlast[b,2] <- mean(predTests[b,])
# }
#############Train the model for whole data set#################


雷达卡



京公网安备 11010802022788号







