- 阅读权限
- 255
- 威望
- 0 级
- 论坛币
- 18999 个
- 通用积分
- 1029.7754
- 学术水平
- 146 点
- 热心指数
- 166 点
- 信用等级
- 135 点
- 经验
- 36357 点
- 帖子
- 541
- 精华
- 0
- 在线时间
- 887 小时
- 注册时间
- 2015-9-25
- 最后登录
- 2025-12-4
|
- # 自编kmeans函数
- kclus <- function(x, y, nclus, random.seed=123) {
- set.seed(random.seed)
- xcen <- runif(n = nclus, min = min(x), max = max(x))
- ycen <- runif(n = nclus, min = min(y), max = max(y))
- data <- data.frame(xval = x, yval = y, clus = NA)
- clus <- data.frame(name = 1:nclus, xcen = xcen, ycen = ycen)
- finish <- FALSE
- while(finish == FALSE) {
- for(i in 1:length(x)) {
- dist <- sqrt((x[i]-clus$xcen)^2 + (y[i]-clus$ycen)^2)
- data$clus[i] <- which.min(dist)
- }
- xcen_old <- clus$xcen
- ycen_old <- clus$ycen
- for(i in 1:nclus) {
- clus[i,2] <- mean(subset(data$xval, data$clus == i))
- clus[i,3] <- mean(subset(data$yval, data$clus == i))
- }
- if(identical(xcen_old, clus$xcen) & identical(ycen_old, clus$ycen)) finish <- TRUE
- }
- data
- }
- x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
- matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
- colnames(x) <- c("x", "y")
- #内置函数
- cl <- kmeans(x, 2)
- cl$centers
- cluster <- kclus(x[,'x'], x[,'y'], 2)
- cl2 <- aggregate(.~clus, cluster, mean)
- cl2
- plot(x, col = cl$cluster)
- points(cl$centers, col = 'blue', pch = 8, cex = 2)
- points(cl2$xval,cl2$yval,pch=16,col = 'green',cex=1)
- legend('topleft',c('内置函数','自编函数'),pch = c(8,16),
- col = c('blue','green'))
复制代码
|
-
总评分: 论坛币 + 30
学术水平 + 7
热心指数 + 7
信用等级 + 7
查看全部评分
|