- 阅读权限
- 255
- 威望
- 0 级
- 论坛币
- 1234 个
- 通用积分
- 0.4800
- 学术水平
- 12 点
- 热心指数
- 13 点
- 信用等级
- 9 点
- 经验
- 2381 点
- 帖子
- 119
- 精华
- 0
- 在线时间
- 159 小时
- 注册时间
- 2012-4-6
- 最后登录
- 2021-1-22
|
自己写了一个 供参考
- ##function:nextlist
- ## -input:频繁k-项集
- ## -output:候选(k+1)-项集
- nextlist <- function(old_list){
- z <- 1
- len <- length(old_list)
- new_list <- c()
- stgnum <- length(old_list[[1]])
- if(len==1) return(new_list)
- for (i in 1:(len-1)){
- for(j in (i+1):len){
- if(sum((old_list[[i]]==old_list[[j]])[1:(stgnum-1)])==(stgnum-1)){
- #仅当前k-1项相同,才合并两项
- temp <- unique(c(old_list[[i]],old_list[[j]]))
- if (subpart(temp,old_list)==1){
- new_list[[z]] <- temp
- z <- z + 1
- }
- #检查新产生的项是否包含非频繁项集
- }
- }
- }
- new_list <- unique(new_list)
- return(new_list)
- }
-
- #function:subpart
- #-input:k-vector
- #-output:if all (k-1)-subsets in orig_list
- subpart <- function(vec,orig_list){
- len <- length(vec)
- vecsub <- c()
- if (len<=2) return(1)
- a <- 1
- for (i in 1:(len-2)){
- temp <- ifinlist(vec[-i],orig_list)
- if (temp==0) {
- a <- 0
- break
- }
- }
- return(a)
- }
- #function:ifinlist
- #-input:vector
- #-output:if the vector in list
- ifinlist <- function(vec,orig_list){
- len <- length(vec)
- a <- 0
- for (i in 1:length(orig_list)){
- if (sum(vec==orig_list[[i]])==len){
- a <- 1
- break
- }
- }
- return(a)
- }
- SelecteData <- function(item,Base){
- if (length(item)==0){
- print("warming:there's no selected data")
- return(0)
- }else{
- len <- length(item)
- selected_data <- c()
- for (i in 1:len){
- tag <- match(item[[i]],names(Base))
- selected_data <- cbind(selected_data,Base[,tag])
- }
- return(as.data.frame(selected_data))
- }
- }
-
-
- minApriori <- function(orig_list,Base,threshold_min){
- varnum <- dim(Base)[2]
- list1 <- orig_list
- #list1用于迭代
- #list2是符合阀值的list1
- stginfo <- c()
- z2 <- 1
- stgnum <- 1
- while (length(list1)>=1&stgnum<varnum){
- list2 <- c()
- list2_support <- c()
- z <- 1
- jishu <- 1
- for(i in list1){
- stgnum <- length(i)
- temp_data <- SelecteData(i,Base)
- temp_support <- sum(apply(temp_data,1,min))
- if (temp_support >= threshold_min){
- list2[[z]] <- i
- z <- z + 1
- list2_support <- c(list2_support,temp_support)
- }
- jishu <- jishu + 1
- print(jishu)
- }
- list3 <- matrix(unlist(list2),ncol=stgnum,byrow=T)
-
- temp_stginfo <- cbind(list3,list2_support)
- stginfo[[z2]] <- temp_stginfo
-
- list1 <- nextlist(list2)
- z2 <- z2 + 1
- print(length(list1))
- }
- return(stginfo)
- }
-
复制代码
|
-
总评分: 经验 + 60
论坛币 + 60
查看全部评分
|