- 阅读权限
- 255
- 威望
- 0 级
- 论坛币
- 50288 个
- 通用积分
- 83.6306
- 学术水平
- 253 点
- 热心指数
- 300 点
- 信用等级
- 208 点
- 经验
- 41518 点
- 帖子
- 3256
- 精华
- 14
- 在线时间
- 766 小时
- 注册时间
- 2006-5-4
- 最后登录
- 2022-11-6
|
- > #clean the workspace and memory
- > rm( list=ls() )
- > gc()
- used (Mb) gc trigger (Mb) max used (Mb)
- Ncells 302204 16.2 597831 32.0 597831 32.0
- Vcells 543603 4.2 22487081 171.6 43286117 330.3
- >
- > tbl <- read.csv("data/itemsets002.csv", header=FALSE)
- > tbl <- as.matrix(tbl)
- > colnames(tbl) <- NULL
- > itemsets <- t(tbl)
- > print(itemsets)
- [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
- [1,] 1 0 0 1 1 0 1 1 1
- [2,] 1 1 1 1 0 1 0 1 1
- [3,] 0 0 1 0 1 1 1 1 1
- [4,] 0 1 0 1 0 0 0 0 0
- [5,] 1 0 0 0 0 0 0 1 0
- >
- > items <- c(1,2,3,4,5)
- > min_sup <- 0.22*nrow(itemsets)
- >
- > f <- NULL
- > ff <- NULL
- > testEclat <- function(data,base_items,MIN_SUP){
- + print(data)
- + p <- GetFrequentTidSets(data,base_items,MIN_SUP)
- + print(p)
- + Eclat(p,f,MIN_SUP,length(base_items))
- + return(f)
- + }
- >
- > GetFrequentTidSets <- function(data,base_items,MIN_SUP){
- + tidsets <- NULL
- + data <- cbind(data,apply(data,1,sum))
- + items <- diag(length(base_items))
- + for(idx in seq(nrow(data))){
- + tidsets <- rbind(tidsets,c(items[idx,],data[idx,]))
- + }
- + tidsets <- tidsets[tidsets[,ncol(tidsets)]>MIN_SUP,-ncol(tidsets)]
- + return(tidsets)
- + }
- >
- > Eclat <- function(p,f,MIN_SUP,parameter=NULL){
- + len <- nrow(p)
- + for(idx in seq(len)){
- + a <- p[idx,]
- + AddFrequentItemset(f,a)
- + pa <- NULL
- + jdx <- idx + 1
- + while(idx<jdx && jdx<=len){
- + b <- p[jdx,]
- + ab <- MergeTidSets(a,b,parameter)
- + if(GetSupport(ab,parameter)>=MIN_SUP){
- + pa <- rbind(pa,ab)
- + }
- + jdx <- jdx + 1
- + }
- + rownames(pa) <- NULL
- + if(!IsEmptyTidSets(pa)){
- + #print(pa)
- + Eclat(pa,f,MIN_SUP,parameter)
- + }
- + }
- + }
- >
- > IsEmptyTidSets <- function(pa){
- + if(length(pa)>0)return(FALSE)
- + return(TRUE)
- + }
- >
- > MergeTidSets <- function(a,b,parameter=NULL){
- + len4i <- parameter
- + len4t <- length(a)
- + return(c(ifelse(a[1:len4i]+b[1:len4i],1,0),a[(len4i+1):len4t]*b[(len4i+1):len4t]))
- + }
- >
- > AddFrequentItemset <- function(f,p){
- + ff <<- rbind(ff,p)
- + }
- >
- > GetSupport <- function(ab,parameter=NULL){
- + len4i <- parameter
- + len4t <- length(ab)
- + return(sum(ab[(len4i+1):len4t]))
- + }
- >
- > testEclat(itemsets,items,min_sup)
- [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
- [1,] 1 0 0 1 1 0 1 1 1
- [2,] 1 1 1 1 0 1 0 1 1
- [3,] 0 0 1 0 1 1 1 1 1
- [4,] 0 1 0 1 0 0 0 0 0
- [5,] 1 0 0 0 0 0 0 1 0
- [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
- [1,] 1 0 0 0 0 1 0 0 1 1 0 1 1 1
- [2,] 0 1 0 0 0 1 1 1 1 0 1 0 1 1
- [3,] 0 0 1 0 0 0 0 1 0 1 1 1 1 1
- [4,] 0 0 0 1 0 0 1 0 1 0 0 0 0 0
- [5,] 0 0 0 0 1 1 0 0 0 0 0 0 1 0
- NULL
- > rownames(ff) <- NULL
- > print(ff)
- [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
- [1,] 1 0 0 0 0 1 0 0 1 1 0 1 1 1
- [2,] 1 1 0 0 0 1 0 0 1 0 0 0 1 1
- [3,] 1 1 1 0 0 0 0 0 0 0 0 0 1 1
- [4,] 1 1 0 0 1 1 0 0 0 0 0 0 1 0
- [5,] 1 0 1 0 0 0 0 0 0 1 0 1 1 1
- [6,] 1 0 0 0 1 1 0 0 0 0 0 0 1 0
- [7,] 0 1 0 0 0 1 1 1 1 0 1 0 1 1
- [8,] 0 1 1 0 0 0 0 1 0 0 1 0 1 1
- [9,] 0 1 0 1 0 0 1 0 1 0 0 0 0 0
- [10,] 0 1 0 0 1 1 0 0 0 0 0 0 1 0
- [11,] 0 0 1 0 0 0 0 1 0 1 1 1 1 1
- [12,] 0 0 0 1 0 0 1 0 1 0 0 0 0 0
- [13,] 0 0 0 0 1 1 0 0 0 0 0 0 1 0
- >
复制代码
|
|