受访者驱动抽样(RDS)是一种专门针对隐匿人群的抽样方法,常用于跨性别者、暗娼、MSM等因耻辱感和法律制度的约束难以识别和接触的人群,并逐渐应用于一般人群。这里给出国外学者写的一个相关R程序:
基本参数设定:
n <- 20000 #pop size
ns <- 500 # rds sample size
seeds <- 7 # #of seeds
#d <- round(rlnorm(n,meanlog = 1)) + 1
d <- round(rexp(n,1/5)) + 1 #heavy tailed degrees
#d <- rpois(n,lambda = 5) + 1 #light tail
#d <- rep(10, n)
g <- rep(1,n)
首先构造邻接矩阵的函数:
makeGraph <- function(d){
n <- length(d)
edges <- list()
d1 <- d
while(sum(d1)>0.5){
l <- sample.int(n,1,prob=d1)
dt <- d1
dt[l] <- d1[l] - 1
if(sum(dt) > 0.5){
k <- sample.int(n,1,prob=dt)
v <- TRUE
d1[k] <- d1[k] - 1
}else{
k <- sample.int(n,1,prob=d)
v <- FALSE
}
d1[l] <- d1[l] - 1
edges[[length(edges)+1]] <- c(l,k,v)
}
el <- do.call(rbind,edges)
el
}
el <- makeGraph(d) #el即为相关联的矩阵
构造RDS抽样函数:
sampRDS <- function(el, d, ns, g, ss, biased=TRUE,pr = c(0,.1,.9)){
maxR <- length(pr) -1
n <- length(g)
ml <- if(is.factor(g)) max(levels(g)) else max(g)
if(biased){
seeds <- sample.int(n,ns,prob=as.numeric(as.factor(g))-1)
}else{
seeds <- sample.int(n,ns, prob = d)
}
samp <- seeds
recr <- rep(-1,ns)
time <- 0 + (1:ns)/10000000
rcTime <- rexp(ns)
v <- rep(-1,ns)
t1 <- time
while(length(samp) < ss){
subjIndex <- which.min(t1 + rcTime)
if(length(subjIndex)==0){
print("redraw")
subj <- sample( (1:n)[-samp],1)
samp <- c(samp,subj)
recr <- c(recr,-1)
time <- c(time,max(time+1))
rcTime <- c(rcTime,rexp(1))
t1 <- c(t1,max(time+1))
v <- c(v,-1)
}else{
t <- t1[subjIndex] + rcTime[subjIndex]
t1[subjIndex] <- NA
subj <- samp[subjIndex]
nr <- sample(0:maxR,1,replace=FALSE,prob=pr)
nbrs <- rbind(el[el[,1]==subj,2:3,drop=FALSE],
el[el[,2]==subj & el[,3]>0.5,c(1,3),drop=FALSE]
)
nbrs <- nbrs[!(nbrs[,1] %in% samp) & nbrs[,1]!=subj,,drop=FALSE]
nr <- min(nr,nrow(nbrs))
if(nr>0){
s <- sample.int(nrow(nbrs),nr,replace=FALSE)
s <- s[!duplicated(nbrs[s,1])]
nr <- length(s)
samp <- c(samp,nbrs[s,1])
recr <- c(recr,rep(subj,nr))
tm <- t + t + (0:(nr-1)) / 1000000
time <- c(time,tm)
t1 <- c(t1,tm)
rcTime <- c(rcTime,rexp(nr))
v <- c(v,nbrs[s,2])
}
}
}
data.frame(subject=samp,recruiter=recr,time=time,v=v)
}
rds <- sampRDS(el, d, seeds,g,ns,FALSE, pr = c(0,.1,.9))
基本函数如上。


雷达卡




京公网安备 11010802022788号







