【独家发布】Statistical Computing with R

0关注
2粉丝

已卖：362份资源

硕士生

46%

还不是VIP/贵宾

-

TA的文库 其他...

Nvivo(定性数据分析)

Observational Study

Mathematica NewOccidental

0%

威望: 0 级
论坛币: 3804 个
通用积分: 1.5037
学术水平: 14 点
热心指数: 9 点
信用等级: 11 点
经验: 1458 点
帖子: 102
精华: 1
在线时间: 19 小时
注册时间: 2006-5-13
最后登录: 2016-12-10

楼主

Eviewschen 发表于 2014-11-10 10:26:53 |AI写论文

是否 +2 论坛币

k人参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群

赵安豆老师微信：zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

立即领取

感谢您参与论坛问题回答

经管之家送您两个论坛币！

+2 论坛币

http://personal.bgsu.edu/~mrizzo/SCR.htm

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

分享0 收藏2 回帖

关键词：Statistical statistica computing statistic Statist

本帖被以下文库推荐

· Computing NewOccidental|主题: 411, 订阅: 16

沙发

Eviewschen 发表于 2014-11-10 10:27:46

#######################################################
### Statistical Computing with R ###
### Maria L. Rizzo ###
### Chapman & Hall / CRC ###
### ISBN 9781584885450 ###
### ###
### R code for Chapter 3 Examples ###
#######################################################
### Example 3.2 (Inverse transform method, continuous case)
n <- 1000
u <- runif(n)
x <- u^(1/3)
hist(x, prob = TRUE, main = bquote(f(x)==3*x^2)) #density histogram of sample
y <- seq(0, 1, .01)
lines(y, 3*y^2) #density curve f(x)
### Example 3.4 (Two point distribution)
n <- 1000
p <- 0.4
u <- runif(n)
x <- as.integer(u > 0.6) #(u > 0.6) is a logical vector
mean(x)
var(x)
### Example 3.5 (Geometric distribution)
n <- 1000
p <- 0.25
u <- runif(n)
k <- ceiling(log(1-u) / log(1-p)) - 1
# more efficient
k <- floor(log(u) / log(1-p))
### Example 3.6 (Logarithmic distribution)
rlogarithmic <- function(n, theta) {
#returns a random logarithmic(theta) sample size n
u <- runif(n)
#set the initial length of cdf vector
N <- ceiling(-16 / log10(theta))
k <- 1:N
a <- -1/log(1-theta)
fk <- exp(log(a) + k * log(theta) - log(k))
Fk <- cumsum(fk)
x <- integer(n)
for (i in 1:n) {
x[i] <- as.integer(sum(u[i] > Fk)) #F^{-1}(u)-1
while (x[i] == N) {
#if x==N we need to extend the cdf
#very unlikely because N is large
logf <- log(a) + (N+1)*log(theta) - log(N+1)
fk <- c(fk, exp(logf))
Fk <- c(Fk, Fk[N] + fk[N+1])
N <- N + 1
x[i] <- as.integer(sum(u[i] > Fk))
}
}
x + 1
}
n <- 1000
theta <- 0.5
x <- rlogarithmic(n, theta)
#compute density of logarithmic(theta) for comparison
k <- sort(unique(x))
p <- -1 / log(1 - theta) * theta^k / k
se <- sqrt(p*(1-p)/n) #standard error
round(rbind(table(x)/n, p, se),3)
### Example 3.7 (Acceptance-rejection method)
n <- 1000
k <- 0 #counter for accepted
j <- 0 #iterations
y <- numeric(n)
while (k < n) {
u <- runif(1)
j <- j + 1
x <- runif(1) #random variate from g
if (x * (1-x) > u) {
#we accept x
k <- k + 1
y[k] <- x
}
}
j
#compare empirical and theoretical percentiles
p <- seq(.1, .9, .1)
Qhat <- quantile(y, p) #quantiles of sample
Q <- qbeta(p, 2, 2) #theoretical quantiles
se <- sqrt(p * (1-p) / (n * dbeta(Q, 2, 2)^2)) #see Ch. 2
round(rbind(Qhat, Q, se), 3)
### Example 3.8 (Beta distribution)
n <- 1000
a <- 3
b <- 2
u <- rgamma(n, shape=a, rate=1)
v <- rgamma(n, shape=b, rate=1)
x <- u / (u + v)
q <- qbeta(ppoints(n), a, b)
qqplot(q, x, cex=0.25, xlab="Beta(3, 2)", ylab="Sample")
abline(0, 1)
### Example 3.9 (Logarithmic distribution, version 2)
n <- 1000
theta <- 0.5
u <- runif(n) #generate logarithmic sample
v <- runif(n)
x <- floor(1 + log(v) / log(1 - (1 - theta)^u))
k <- 1:max(x) #calc. logarithmic probs.
p <- -1 / log(1 - theta) * theta^k / k
se <- sqrt(p*(1-p)/n)
p.hat <- tabulate(x)/n
print(round(rbind(p.hat, p, se), 3))
# The following function is a simple replacement for
# rlogarithmic in Example 3.6
rlogarithmic <- function(n, theta) {
stopifnot(all(theta > 0 & theta < 1))
th <- rep(theta, length=n)
u <- runif(n)
v <- runif(n)
x <- floor(1 + log(v) / log(1 - (1 - th)^u))
return(x)
}
### Example 3.10 (Chisquare)
n <- 1000
nu <- 2
X <- matrix(rnorm(n*nu), n, nu)^2 #matrix of sq. normals
#sum the squared normals across each row: method 1
y <- rowSums(X)
#method 2
y <- apply(X, MARGIN=1, FUN=sum) #a vector length n
mean(y)
mean(y^2)
### Example 3.11 (Convolutions and mixtures)
n <- 1000
x1 <- rgamma(n, 2, 2)
x2 <- rgamma(n, 2, 4)
s <- x1 + x2 #the convolution
u <- runif(n)
k <- as.integer(u > 0.5) #vector of 0's and 1's
x <- k * x1 + (1-k) * x2 #the mixture
par(mfcol=c(1,2)) #two graphs per page
hist(s, prob=TRUE)
hist(x, prob=TRUE)
par(mfcol=c(1,1)) #restore display
### Example 3.12 (Mixture of several gamma distributions)
# density estimates are plotted
n <- 5000
k <- sample(1:5, size=n, replace=TRUE, prob=(1:5)/15)
rate <- 1/k
x <- rgamma(n, shape=3, rate=rate)
#plot the density of the mixture
#with the densities of the components
plot(density(x), xlim=c(0,40), ylim=c(0,.3),
lwd=3, xlab="x", main="")
for (i in 1:5)
lines(density(rgamma(n, 3, 1/i)))
### Example 3.13 (Mixture of several gamma distributions)
n <- 5000
p <- c(.1,.2,.2,.3,.2)
lambda <- c(1,1.5,2,2.5,3)
k <- sample(1:5, size=n, replace=TRUE, prob=p)
rate <- lambda[k]
x <- rgamma(n, shape=3, rate=rate)
### Example 3.14 (Plot density of mixture)
f <- function(x, lambda, theta) {
#density of the mixture at the point x
sum(dgamma(x, 3, lambda) * theta)
}
p <- c(.1,.2,.2,.3,.2)
lambda <- c(1,1.5,2,2.5,3)
x <- seq(0, 8, length=200)
dim(x) <- length(x) #need for apply
#compute density of the mixture f(x) along x
y <- apply(x, 1, f, lambda=lambda, theta=p)
#plot the density of the mixture
plot(x, y, type="l", ylim=c(0,.85), lwd=3, ylab="Density")
for (j in 1:5) {
#add the j-th gamma density to the plot
y <- apply(x, 1, dgamma, shape=3, rate=lambda[j])
lines(x, y)
}
### Example 3.15 (Poisson-Gamma mixture)
#generate a Poisson-Gamma mixture
n <- 1000
r <- 4
beta <- 3
lambda <- rgamma(n, r, beta) #lambda is random
#now supply the sample of lambda's as the Poisson mean
x <- rpois(n, lambda) #the mixture
#compare with negative binomial
mix <- tabulate(x+1) / n
negbin <- round(dnbinom(0:max(x), r, beta/(1+beta)), 3)
se <- sqrt(negbin * (1 - negbin) / n)
round(rbind(mix, negbin, se), 3)
### Example 3.16 (Spectral decomposition method)
# mean and covariance parameters
mu <- c(0, 0)
Sigma <- matrix(c(1, .9, .9, 1), nrow = 2, ncol = 2)
rmvn.eigen <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
ev <- eigen(Sigma, symmetric = TRUE)
lambda <- ev$values
V <- ev$vectors
R <- V %*% diag(sqrt(lambda)) %*% t(V)
Z <- matrix(rnorm(n*d), nrow = n, ncol = d)
X <- Z %*% R + matrix(mu, n, d, byrow = TRUE)
X
}
# generate the sample
X <- rmvn.eigen(1000, mu, Sigma)
plot(X, xlab = "x", ylab = "y", pch = 20)
print(colMeans(X))
print(cor(X))
### Example 3.17 (SVD method)
rmvn.svd <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
S <- svd(Sigma)
R <- S$u %*% diag(sqrt(S$d)) %*% t(S$v) #sq. root Sigma
Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
X <- Z %*% R + matrix(mu, n, d, byrow=TRUE)
X
}
### Example 3.18 (Choleski factorization method)
rmvn.Choleski <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
Q <- chol(Sigma) # Choleski factorization of Sigma
Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
X <- Z %*% Q + matrix(mu, n, d, byrow=TRUE)
X
}
#generating the samples according to the mean and covariance
#structure as the four-dimensional iris virginica data
y <- subset(x=iris, Species=="virginica")[, 1:4]
mu <- colMeans(y)
Sigma <- cov(y)
mu
Sigma
#now generate MVN data with this mean and covariance
X <- rmvn.Choleski(200, mu, Sigma)
pairs(X)
### Example 3.19 (Comparing performance of MVN generators)
library(MASS)
library(mvtnorm)
n <- 100 #sample size
d <- 30 #dimension
N <- 2000 #iterations
mu <- numeric(d)
set.seed(100)
system.time(for (i in 1:N)
rmvn.eigen(n, mu, cov(matrix(rnorm(n*d), n, d))))
set.seed(100)
system.time(for (i in 1:N)
rmvn.svd(n, mu, cov(matrix(rnorm(n*d), n, d))))
set.seed(100)
system.time(for (i in 1:N)
rmvn.Choleski(n, mu, cov(matrix(rnorm(n*d), n, d))))
set.seed(100)
system.time(for (i in 1:N)
mvrnorm(n, mu, cov(matrix(rnorm(n*d), n, d))))
set.seed(100)
system.time(for (i in 1:N)
rmvnorm(n, mu, cov(matrix(rnorm(n*d), n, d))))
set.seed(100)
system.time(for (i in 1:N)
cov(matrix(rnorm(n*d), n, d)))
detach(package:MASS)
detach(package:mvtnorm)
### Example 3.20 (Multivariate normal mixture)
library(MASS) #for mvrnorm
#ineffecient version loc.mix.0 with loops
loc.mix.0 <- function(n, p, mu1, mu2, Sigma) {
#generate sample from BVN location mixture
X <- matrix(0, n, 2)
for (i in 1:n) {
k <- rbinom(1, size = 1, prob = p)
if (k)
X[i,] <- mvrnorm(1, mu = mu1, Sigma) else
X[i,] <- mvrnorm(1, mu = mu2, Sigma)
}
return(X)
}
#more efficient version
loc.mix <- function(n, p, mu1, mu2, Sigma) {
#generate sample from BVN location mixture
n1 <- rbinom(1, size = n, prob = p)
n2 <- n - n1
x1 <- mvrnorm(n1, mu = mu1, Sigma)
x2 <- mvrnorm(n2, mu = mu2, Sigma)
X <- rbind(x1, x2) #combine the samples
return(X[sample(1:n), ]) #mix them
}
x <- loc.mix(1000, .5, rep(0, 4), 2:5, Sigma = diag(4))
r <- range(x) * 1.2
par(mfrow = c(2, 2))
for (i in 1:4)
hist(x[ , i], xlim = r, ylim = c(0, .3), freq = FALSE,
main = "", breaks = seq(-5, 10, .5))
detach(package:MASS)
par(mfrow = c(1, 1))
### Example 3.21 (Generating variates on a sphere)
runif.sphere <- function(n, d) {
# return a random sample uniformly distributed
# on the unit sphere in R ^d
M <- matrix(rnorm(n*d), nrow = n, ncol = d)
L <- apply(M, MARGIN = 1,
FUN = function(x){sqrt(sum(x*x))})
D <- diag(1 / L)
U <- D %*% M
U
}
#generate a sample in d=2 and plot
X <- runif.sphere(200, 2)
par(pty = "s")
plot(X, xlab = bquote(x[1]), ylab = bquote(x[2]))
par(pty = "m")
### Example 3.22 (Poisson process)
lambda <- 2
t0 <- 3
Tn <- rexp(100, lambda) #interarrival times
Sn <- cumsum(Tn) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
### Example 3.23 (Poisson process, cont.)
lambda <- 2
t0 <- 3
upper <- 100
pp <- numeric(10000)
for (i in 1:10000) {
N <- rpois(1, lambda * upper)
Un <- runif(N, 0, upper) #unordered arrival times
Sn <- sort(Un) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
pp[i] <- n - 1 #arrivals in [0, t0]
}
#alternately, the loop can be replaced by replicate function
pp <- replicate(10000, expr = {
N <- rpois(1, lambda * upper)
Un <- runif(N, 0, upper) #unordered arrival times
Sn <- sort(Un) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
n - 1 }) #arrivals in [0, t0]
c(mean(pp), var(pp))
### Example 3.24 (Nonhomogeneous Poisson process)
lambda <- 3
upper <- 100
N <- rpois(1, lambda * upper)
Tn <- rexp(N, lambda)
Sn <- cumsum(Tn)
Un <- runif(N)
keep <- (Un <= cos(Sn)^2) #indicator, as logical vector
Sn[keep]
round(Sn[keep], 4)
### Example 3.25 (Renewal process)
t0 <- 5
Tn <- rgeom(100, prob = .2) #interarrival times
Sn <- cumsum(Tn) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
Nt0 <- replicate(1000, expr = {
Sn <- cumsum(rgeom(100, prob = .2))
min(which(Sn > t0)) - 1
})
table(Nt0)/1000
Nt0
t0 <- seq(0.1, 30, .1)
mt <- numeric(length(t0))
for (i in 1:length(t0)) {
mt[i] <- mean(replicate(1000,
{
Sn <- cumsum(rgeom(100, prob = .2))
min(which(Sn > t0[i])) - 1
}))
}
plot(t0, mt, type = "l", xlab = "t", ylab = "mean")
abline(0, .25)
### Example 3.26 (Symmetric random walk)
n <- 400
incr <- sample(c(-1, 1), size = n, replace = TRUE)
S <- as.integer(c(0, cumsum(incr)))
plot(0:n, S, type = "l", main = "", xlab = "i")
### Example 3.27 (Generator for the time until return to origin)
set.seed(12345)
#compute the probabilities directly
n <- 1:10000
p2n <- exp(lgamma(2*n-1)
- log(n) - (2*n-1)*log(2) - 2*lgamma(n))
#or compute using dbinom
P2n <- (.5/n) * dbinom(n-1, size = 2*n-2, prob = 0.5)
pP2n <- cumsum(P2n)
#given n compute the time of the last return to 0 in (0,n]
n <- 200
sumT <- 0
while (sumT <= n) {
u <- runif(1)
s <- sum(u > pP2n)
if (s == length(pP2n))
warning("T is truncated")
Tj <- 2 * (1 + s)
#print(c(Tj, sumT))
sumT <- sumT + Tj
}
sumT - Tj

复制代码

藤椅

Eviewschen 发表于 2014-11-10 10:28:59

#######################################################
### Statistical Computing with R ###
### Maria L. Rizzo ###
### Chapman & Hall / CRC ###
### ISBN 9781584885450 ###
### ###
### R code for Chapter 4 Examples ###
#######################################################
### Example 4.1 (Scatterplot matrix)
data(iris)
#virginica data in first 4 columns of the last 50 obs.
# not shown in text
pairs(iris[101:150, 1:4])
panel.d <- function(x, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, .5))
lines(density(x))
}
# Fig. 4.1
x <- scale(iris[101:150, 1:4])
r <- range(x)
pairs(x, diag.panel = panel.d, xlim = r, ylim = r)
library(lattice)
splom(iris[101:150, 1:4]) #plot 1
#for all 3 at once, in color, plot 2
splom(iris[,1:4], groups = iris$Species)
# Fig. 4.2
#for all 3 at once, black and white, plot 3
splom(~iris[1:4], groups = Species, data = iris,
col = 1, pch = c(1, 2, 3), cex = c(.5,.5,.5))
### Example 4.2 (Plot bivariate normal density)
#the standard BVN density
f <- function(x,y) {
z <- (1/(2*pi)) * exp(-.5 * (x^2 + y^2))
}
y <- x <- seq(-3, 3, length= 50)
z <- outer(x, y, f) #compute density for all (x,y)
persp(x, y, z) #the default plot
persp(x, y, z, theta = 45, phi = 30, expand = 0.6,
ltheta = 120, shade = 0.75, ticktype = "detailed",
xlab = "X", ylab = "Y", zlab = "f(x, y)")
### Example 4.3 (Add elements to perspective plot)
#store viewing transformation in M
persp(x, y, z, theta = 45, phi = 30,
expand = .4, box = FALSE) -> M
#add some points along a circle
a <- seq(-pi, pi, pi/16)
newpts <- cbind(cos(a), sin(a)) * 2
newpts <- cbind(newpts, 0, 1) #z=0, t=1
N <- newpts %*% M
points(N[,1]/N[,4], N[,2]/N[,4], col=2)
#add lines
x2 <- seq(-3, 3, .1)
y2 <- -x2^2 / 3
z2 <- dnorm(x2) * dnorm(y2)
N <- cbind(x2, y2, z2, 1) %*% M
lines(N[,1]/N[,4], N[,2]/N[,4], col=4)
#add text
x3 <- c(0, 3.1)
y3 <- c(0, -3.1)
z3 <- dnorm(x3) * dnorm(y3) * 1.1
N <- cbind(x3, y3, z3, 1) %*% M
text(N[1,1]/N[1,4], N[1,2]/N[1,4], "f(x,y)")
text(N[2,1]/N[2,4], N[2,2]/N[2,4], bquote(y==-x^2/3))
### Example 4.4 (Surface plot using wireframe(lattice))
library(lattice)
x <- y <- seq(-3, 3, length= 50)
xy <- expand.grid(x, y)
z <- (1/(2*pi)) * exp(-.5 * (xy[,1]^2 + xy[,2]^2))
wireframe(z ~ xy[,1] * xy[,2])
### Example 4.5 (3D scatterplot)
library(lattice)
attach(iris)
#basic 3 color plot with arrows along axes
print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
data=iris, groups=Species))
print(cloud(Sepal.Length ~ Petal.Length * Petal.Width,
data = iris, groups = Species, main = "1", pch=1:3,
scales = list(draw = FALSE), zlab = "SL",
screen = list(z = 30, x = -75, y = 0)),
split = c(1, 1, 2, 2), more = TRUE)
print(cloud(Sepal.Width ~ Petal.Length * Petal.Width,
data = iris, groups = Species, main = "2", pch=1:3,
scales = list(draw = FALSE), zlab = "SW",
screen = list(z = 30, x = -75, y = 0)),
split = c(2, 1, 2, 2), more = TRUE)
print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
data = iris, groups = Species, main = "3", pch=1:3,
scales = list(draw = FALSE), zlab = "PL",
screen = list(z = 30, x = -55, y = 0)),
split = c(1, 2, 2, 2), more = TRUE)
print(cloud(Petal.Width ~ Sepal.Length * Sepal.Width,
data = iris, groups = Species, main = "4", pch=1:3,
scales = list(draw = FALSE), zlab = "PW",
screen = list(z = 30, x = -55, y = 0)),
split = c(2, 2, 2, 2))
detach(iris)
### Example 4.6 (Contour plot)
#contour plot with labels
contour(volcano, asp = 1, labcex = 1)
#another version from lattice package
library(lattice)
contourplot(volcano) #similar to above
### Example 4.7 (Filled contour plots)
image(volcano, col = terrain.colors(100), axes = FALSE)
contour(volcano, levels = seq(100,200,by = 10), add = TRUE)
filled.contour(volcano, color = terrain.colors, asp = 1)
levelplot(volcano, scales = list(draw = FALSE),
xlab = "", ylab = "")
### Example 4.8 (2D histogram)
library(hexbin)
x <- matrix(rnorm(4000), 2000, 2)
plot(hexbin(x[,1], x[,2]))
### Example 4.9 (Andrews curves)
library(DAAG)
attach(leafshape17)
f <- function(a, v) {
#Andrews curve f(a) for a data vector v in R^3
v[1]/sqrt(2) + v[2]*sin(a) + v[3]*cos(a)
}
#scale data to range [-1, 1]
x <- cbind(bladelen, petiole, bladewid)
n <- nrow(x)
mins <- apply(x, 2, min) #column minimums
maxs <- apply(x, 2, max) #column maximums
r <- maxs - mins #column ranges
y <- sweep(x, 2, mins) #subtract column mins
y <- sweep(y, 2, r, "/") #divide by range
x <- 2 * y - 1 #now has range [-1, 1]
#set up plot window, but plot nothing yet
plot(0, 0, xlim = c(-pi, pi), ylim = c(-3,3),
xlab = "t", ylab = "Andrews Curves",
main = "", type = "n")
#now add the Andrews curves for each observation
#line type corresponds to leaf architecture
#0=orthotropic, 1=plagiotropic
a <- seq(-pi, pi, len=101)
dim(a) <- length(a)
for (i in 1:n) {
g <- arch[i] + 1
y <- apply(a, MARGIN = 1, FUN = f, v = x[i,])
lines(a, y, lty = g)
}
legend(3, c("Orthotropic", "Plagiotropic"), lty = 1:2)
detach(leafshape17)
### Example 4.10 (Parallel coordinates)
library(MASS)
library(lattice)
trellis.device(color = FALSE) #black and white display
x <- crabs[seq(5, 200, 5), ] #get every fifth obs.
parallel(~x[4:8] | sp*sex, x)
trellis.device(color = FALSE) #black and white display
x <- crabs[seq(5, 200, 5), ] #get every fifth obs.
a <- x$CW * x$CL #area of carapace
x[4:8] <- x[4:8] / sqrt(a) #adjust for size
parallel(~x[4:8] | sp*sex, x)
### Example 4.11 (Segment plot)
#segment plot
library(MASS) #for crabs data
attach(crabs)
x <- crabs[seq(5, 200, 5), ] #get every fifth obs.
x <- subset(x, sex == "M") #keep just the males
a <- x$CW * x$CL #area of carapace
x[4:8] <- x[4:8] / sqrt(a) #adjust for size
#use default color palette or other colors
palette(gray(seq(.4, .95, len = 5))) #use gray scale
#palette(rainbow(6)) #or use color
stars(x[4:8], draw.segments = TRUE,
labels = x$sp, nrow = 4,
ylim = c(-2,10), key.loc = c(3,-1))
#after viewing, restore the default colors
palette("default")
detach(crabs)

复制代码

板凳

Eviewschen 发表于 2014-11-10 10:32:51

#######################################################
### Statistical Computing with R ###
### Maria L. Rizzo ###
### Chapman & Hall / CRC ###
### ISBN 9781584885450 ###
### ###
### R code for Chapter 5 Examples ###
#######################################################
### Example 5.1 (Simple Monte Carlo integration)
m <- 10000
x <- runif(m)
theta.hat <- mean(exp(-x))
print(theta.hat)
print(1 - exp(-1))
### Example 5.2 (Simple Monte Carlo integration, cont.)
m <- 10000
x <- runif(m, min=2, max=4)
theta.hat <- mean(exp(-x)) * 2
print(theta.hat)
print(exp(-2) - exp(-4))
### Example 5.3 (Monte Carlo integration, unbounded interval)
x <- seq(.1, 2.5, length = 10)
m <- 10000
u <- runif(m)
cdf <- numeric(length(x))
for (i in 1:length(x)) {
g <- x[i] * exp(-(u * x[i])^2 / 2)
cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
}
Phi <- pnorm(x)
print(round(rbind(x, cdf, Phi), 3))
### Example 5.4 (Example 5.3, cont.)
x <- seq(.1, 2.5, length = 10)
m <- 10000
z <- rnorm(m)
dim(x) <- length(x)
p <- apply(x, MARGIN = 1,
FUN = function(x, z) {mean(z < x)}, z = z)
Phi <- pnorm(x)
print(round(rbind(x, p, Phi), 3))
### Example 5.5 (Error bounds for MC integration)
x <- 2
m <- 10000
z <- rnorm(m)
g <- (z < x) #the indicator function
v <- mean((g - mean(g))^2) / m
cdf <- mean(g)
c(cdf, v)
c(cdf - 1.96 * sqrt(v), cdf + 1.96 * sqrt(v))
### Example 5.6 (Antithetic variables)
MC.Phi <- function(x, R = 10000, antithetic = TRUE) {
u <- runif(R/2)
if (!antithetic) v <- runif(R/2) else
v <- 1 - u
u <- c(u, v)
cdf <- numeric(length(x))
for (i in 1:length(x)) {
g <- x[i] * exp(-(u * x[i])^2 / 2)
cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
}
cdf
}
x <- seq(.1, 2.5, length=5)
Phi <- pnorm(x)
set.seed(123)
MC1 <- MC.Phi(x, anti = FALSE)
set.seed(123)
MC2 <- MC.Phi(x)
print(round(rbind(x, MC1, MC2, Phi), 5))
m <- 1000
MC1 <- MC2 <- numeric(m)
x <- 1.95
for (i in 1:m) {
MC1[i] <- MC.Phi(x, R = 1000, anti = FALSE)
MC2[i] <- MC.Phi(x, R = 1000)
}
print(sd(MC1))
print(sd(MC2))
print((var(MC1) - var(MC2))/var(MC1))
### Example 5.7 (Control variate)
m <- 10000
a <- - 12 + 6 * (exp(1) - 1)
U <- runif(m)
T1 <- exp(U) #simple MC
T2 <- exp(U) + a * (U - 1/2) #controlled
mean(T1)
mean(T2)
(var(T1) - var(T2)) / var(T1)
### Example 5.8 (MC integration using control variates)
f <- function(u)
exp(-.5)/(1+u^2)
g <- function(u)
exp(-u)/(1+u^2)
set.seed(510) #needed later
u <- runif(10000)
B <- f(u)
A <- g(u)
cor(A, B)
a <- -cov(A,B) / var(B) #est of c*
a
m <- 100000
u <- runif(m)
T1 <- g(u)
T2 <- T1 + a * (f(u) - exp(-.5)*pi/4)
c(mean(T1), mean(T2))
c(var(T1), var(T2))
(var(T1) - var(T2)) / var(T1)
### Example 5.9 (Control variate and regression)
set.seed(510)
u <- runif(10000)
f <- exp(-.5)/(1+u^2)
g <- exp(-u)/(1+u^2)
c.star <- - lm(g ~ f)$coeff[2] # beta[1]
mu <- exp(-.5)*pi/4
c.star
u <- runif(10000)
f <- exp(-.5)/(1+u^2)
g <- exp(-u)/(1+u^2)
L <- lm(g ~ f)
theta.hat <- sum(L$coeff * c(1, mu)) #pred. value at mu
theta.hat
summary(L)$sigma^2
summary(L)$r.squared
### Example 5.10 (Choice of the importance function)
#code for plot is at the end of the file
m <- 10000
theta.hat <- se <- numeric(5)
g <- function(x) {
exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
}
x <- runif(m) #using f0
fg <- g(x)
theta.hat[1] <- mean(fg)
se[1] <- sd(fg)
x <- rexp(m, 1) #using f1
fg <- g(x) / exp(-x)
theta.hat[2] <- mean(fg)
se[2] <- sd(fg)
x <- rcauchy(m) #using f2
i <- c(which(x > 1), which(x < 0))
x[i] <- 2 #to catch overflow errors in g(x)
fg <- g(x) / dcauchy(x)
theta.hat[3] <- mean(fg)
se[3] <- sd(fg)
u <- runif(m) #f3, inverse transform method
x <- - log(1 - u * (1 - exp(-1)))
fg <- g(x) / (exp(-x) / (1 - exp(-1)))
theta.hat[4] <- mean(fg)
se[4] <- sd(fg)
u <- runif(m) #f4, inverse transform method
x <- tan(pi * u / 4)
fg <- g(x) / (4 / ((1 + x^2) * pi))
theta.hat[5] <- mean(fg)
se[5] <- sd(fg)
rbind(theta.hat, se)
### Example 5.11 (Example 5.10, cont.)
M <- 20 #number of replicates
T2 <- numeric(4)
estimates <- matrix(0, 10, 2)
g <- function(x) {
exp(-x - log(1+x^2)) * (x > 0) * (x < 1) }
for (i in 1:10) {
estimates[i, 1] <- mean(g(runif(M)))
T2[1] <- mean(g(runif(M/4, 0, .25)))
T2[2] <- mean(g(runif(M/4, .25, .5)))
T2[3] <- mean(g(runif(M/4, .5, .75)))
T2[4] <- mean(g(runif(M/4, .75, 1)))
estimates[i, 2] <- mean(T2)
}
estimates
apply(estimates, 2, mean)
apply(estimates, 2, var)
### Example 5.12 (Examples 5.10-5.11, cont.)
M <- 10000 #number of replicates
k <- 10 #number of strata
r <- M / k #replicates per stratum
N <- 50 #number of times to repeat the estimation
T2 <- numeric(k)
estimates <- matrix(0, N, 2)
g <- function(x) {
exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
}
for (i in 1:N) {
estimates[i, 1] <- mean(g(runif(M)))
for (j in 1:k)
T2[j] <- mean(g(runif(M/k, (j-1)/k, j/k)))
estimates[i, 2] <- mean(T2)
}
apply(estimates, 2, mean)
apply(estimates, 2, var)
### Plot importance functions in Figures 5.1(a) and 5.1.(b)
#par(ask = TRUE) #uncomment to pause between graphs
x <- seq(0, 1, .01)
w <- 2
f1 <- exp(-x)
f2 <- (1 / pi) / (1 + x^2)
f3 <- exp(-x) / (1 - exp(-1))
f4 <- 4 / ((1 + x^2) * pi)
g <- exp(-x) / (1 + x^2)
#for color change lty to col
#figure (a)
plot(x, g, type = "l", main = "", ylab = "",
ylim = c(0,2), lwd = w)
lines(x, g/g, lty = 2, lwd = w)
lines(x, f1, lty = 3, lwd = w)
lines(x, f2, lty = 4, lwd = w)
lines(x, f3, lty = 5, lwd = w)
lines(x, f4, lty = 6, lwd = w)
legend("topright", legend = c("g", 0:4),
lty = 1:6, lwd = w, inset = 0.02)
#figure (b)
plot(x, g, type = "l", main = "", ylab = "",
ylim = c(0,3.2), lwd = w, lty = 2)
lines(x, g/f1, lty = 3, lwd = w)
lines(x, g/f2, lty = 4, lwd = w)
lines(x, g/f3, lty = 5, lwd = w)
lines(x, g/f4, lty = 6, lwd = w)
legend("topright", legend = c(0:4),
lty = 2:6, lwd = w, inset = 0.02)

复制代码

报纸

Eviewschen 发表于 2014-11-10 10:34:36

#######################################################
### Statistical Computing with R ###
### Maria L. Rizzo ###
### Chapman & Hall / CRC ###
### ISBN 9781584885450 ###
### ###
### R code for Chapter 6 Examples ###
#######################################################
### Example 6.1 (Basic Monte Carlo estimation)
m <- 1000
g <- numeric(m)
for (i in 1:m) {
x <- rnorm(2)
g[i] <- abs(x[1] - x[2])
}
est <- mean(g)
est
### Example 6.2 (Estimating the MSE of a trimmed mean)
n <- 20
m <- 1000
tmean <- numeric(m)
for (i in 1:m) {
x <- sort(rnorm(n))
tmean[i] <- sum(x[2:(n-1)]) / (n-2)
}
mse <- mean(tmean^2)
mse
sqrt(sum((tmean - mean(tmean))^2)) / m #se
n <- 20
m <- 1000
tmean <- numeric(m)
for (i in 1:m) {
x <- sort(rnorm(n))
tmean[i] <- median(x)
}
mse <- mean(tmean^2)
mse
sqrt(sum((tmean - mean(tmean))^2)) / m #se
### Example 6.3 (MSE of a trimmed mean, cont.)
set.seed(522)
n <- 20
K <- n/2 - 1
m <- 1000
mse <- matrix(0, n/2, 6)
trimmed.mse <- function(n, m, k, p) {
#MC est of mse for k-level trimmed mean of
#contaminated normal pN(0,1) + (1-p)N(0,100)
tmean <- numeric(m)
for (i in 1:m) {
sigma <- sample(c(1, 10), size = n,
replace = TRUE, prob = c(p, 1-p))
x <- sort(rnorm(n, 0, sigma))
tmean[i] <- sum(x[(k+1):(n-k)]) / (n-2*k)
}
mse.est <- mean(tmean^2)
se.mse <- sqrt(mean((tmean-mean(tmean))^2)) / sqrt(m)
return(c(mse.est, se.mse))
}
for (k in 0:K) {
mse[k+1, 1:2] <- trimmed.mse(n=n, m=m, k=k, p=1.0)
mse[k+1, 3:4] <- trimmed.mse(n=n, m=m, k=k, p=.95)
mse[k+1, 5:6] <- trimmed.mse(n=n, m=m, k=k, p=.9)
}
### Example 6.4 (Confidence interval for variance)
n <- 20
alpha <- .05
x <- rnorm(n, mean=0, sd=2)
UCL <- (n-1) * var(x) / qchisq(alpha, df=n-1)
### Example 6.5 (MC estimate of confidence level)
n <- 20
alpha <- .05
UCL <- replicate(1000, expr = {
x <- rnorm(n, mean = 0, sd = 2)
(n-1) * var(x) / qchisq(alpha, df = n-1)
} )
#count the number of intervals that contain sigma^2=4
sum(UCL > 4)
#or compute the mean to get the confidence level
mean(UCL > 4)
### Example 6.6 (Empirical confidence level)
n <- 20
alpha <- .05
UCL <- replicate(1000, expr = {
x <- rchisq(n, df = 2)
(n-1) * var(x) / qchisq(alpha, df = n-1)
} )
sum(UCL > 4)
mean(UCL > 4)
### Example 6.7 (Empirical Type I error rate)
n <- 20
alpha <- .05
mu0 <- 500
sigma <- 100
m <- 10000 #number of replicates
p <- numeric(m) #storage for p-values
for (j in 1:m) {
x <- rnorm(n, mu0, sigma)
ttest <- t.test(x, alternative = "greater", mu = mu0)
p[j] <- ttest$p.value
}
p.hat <- mean(p < alpha)
se.hat <- sqrt(p.hat * (1 - p.hat) / m)
print(c(p.hat, se.hat))
### Example 6.8 (Skewness test of normality)
n <- c(10, 20, 30, 50, 100, 500) #sample sizes
cv <- qnorm(.975, 0, sqrt(6/n)) #crit. values for each n
sk <- function(x) {
#computes the sample skewness coeff.
xbar <- mean(x)
m3 <- mean((x - xbar)^3)
m2 <- mean((x - xbar)^2)
return( m3 / m2^1.5 )
}
#n is a vector of sample sizes
#we are doing length(n) different simulations
p.reject <- numeric(length(n)) #to store sim. results
m <- 10000 #num. repl. each sim.
for (i in 1:length(n)) {
sktests <- numeric(m) #test decisions
for (j in 1:m) {
x <- rnorm(n[i])
#test decision is 1 (reject) or 0
sktests[j] <- as.integer(abs(sk(x)) >= cv[i] )
}
p.reject[i] <- mean(sktests) #proportion rejected
}
p.reject
### Example 6.9 (Empirical power)
n <- 20
m <- 1000
mu0 <- 500
sigma <- 100
mu <- c(seq(450, 650, 10)) #alternatives
M <- length(mu)
power <- numeric(M)
for (i in 1:M) {
mu1 <- mu[i]
pvalues <- replicate(m, expr = {
#simulate under alternative mu1
x <- rnorm(n, mean = mu1, sd = sigma)
ttest <- t.test(x,
alternative = "greater", mu = mu0)
ttest$p.value } )
power[i] <- mean(pvalues <= .05)
}
par(ask = TRUE)
library(Hmisc) #for errbar
plot(mu, power)
abline(v = mu0, lty = 1)
abline(h = .05, lty = 1)
#add standard errors
se <- sqrt(power * (1-power) / m)
errbar(mu, power, yplus = power+se, yminus = power-se,
xlab = bquote(theta))
lines(mu, power, lty=3)
detach(package:Hmisc)
par(ask = FALSE)
### Example 6.10 (Power of the skewness test of normality)
alpha <- .1
n <- 30
m <- 2500
epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05))
N <- length(epsilon)
pwr <- numeric(N)
#critical value for the skewness test
cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
for (j in 1:N) { #for each epsilon
e <- epsilon[j]
sktests <- numeric(m)
for (i in 1:m) { #for each replicate
sigma <- sample(c(1, 10), replace = TRUE,
size = n, prob = c(1-e, e))
x <- rnorm(n, 0, sigma)
sktests[i] <- as.integer(abs(sk(x)) >= cv)
}
pwr[j] <- mean(sktests)
}
#plot power vs epsilon
plot(epsilon, pwr, type = "b",
xlab = bquote(epsilon), ylim = c(0,1))
abline(h = .1, lty = 3)
se <- sqrt(pwr * (1-pwr) / m) #add standard errors
lines(epsilon, pwr+se, lty = 3)
lines(epsilon, pwr-se, lty = 3)
### Example 6.11 (Power comparison of tests of normality)
#only one loop, for epsilon=0.1, was shown in the text
#the simulation below takes several minutes to run
# initialize input and output
library(energy)
alpha <- .1
n <- 30
m <- 500 #try small m for a trial run
test1 <- test2 <- test3 <- numeric(m)
#critical value for the skewness test
cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
sim <- matrix(0, 11, 4)
# estimate power
for (i in 0:10) {
epsilon <- i * .1
for (j in 1:m) {
e <- epsilon
sigma <- sample(c(1, 10), replace = TRUE,
size = n, prob = c(1-e, e))
x <- rnorm(n, 0, sigma)
test1[j] <- as.integer(abs(sk(x)) >= cv)
test2[j] <- as.integer(
shapiro.test(x)$p.value <= alpha)
test3[j] <- as.integer(
mvnorm.etest(x, R=200)$p.value <= alpha)
}
print(c(epsilon, mean(test1), mean(test2), mean(test3)))
sim[i+1, ] <- c(epsilon, mean(test1), mean(test2), mean(test3))
}
detach(package:energy)
# plot the empirical estimates of power
plot(sim[,1], sim[,2], ylim = c(0, 1), type = "l",
xlab = bquote(epsilon), ylab = "power")
lines(sim[,1], sim[,3], lty = 2)
lines(sim[,1], sim[,4], lty = 4)
abline(h = alpha, lty = 3)
legend("topright", 1, c("skewness", "S-W", "energy"),
lty = c(1,2,4), inset = .02)
### Example 6.12 (Count Five test statistic)
x1 <- rnorm(20, 0, sd = 1)
x2 <- rnorm(20, 0, sd = 1.5)
y <- c(x1, x2)
group <- rep(1:2, each = length(x1))
boxplot(y ~ group, boxwex = .3, xlim = c(.5, 2.5), main = "")
points(group, y)
# now identify the extreme points
range(x1)
range(x2)
i <- which(x1 < min(x2))
j <- which(x2 > max(x1))
x1[i]
x2[j]
out1 <- sum(x1 > max(x2)) + sum(x1 < min(x2))
out2 <- sum(x2 > max(x1)) + sum(x2 < min(x1))
max(c(out1, out2))
### Example 6.13 (Count Five test statistic, cont.)
maxout <- function(x, y) {
X <- x - mean(x)
Y <- y - mean(y)
outx <- sum(X > max(Y)) + sum(X < min(Y))
outy <- sum(Y > max(X)) + sum(Y < min(X))
return(max(c(outx, outy)))
}
n1 <- n2 <- 20
mu1 <- mu2 <- 0
sigma1 <- sigma2 <- 1
m <- 1000
# generate samples under H0
stat <- replicate(m, expr={
x <- rnorm(n1, mu1, sigma1)
y <- rnorm(n2, mu2, sigma2)
maxout(x, y)
})
print(cumsum(table(stat)) / m)
print(quantile(stat, c(.8, .9, .95)))
### Example 6.14 (Count Five test)
count5test <- function(x, y) {
X <- x - mean(x)
Y <- y - mean(y)
outx <- sum(X > max(Y)) + sum(X < min(Y))
outy <- sum(Y > max(X)) + sum(Y < min(X))
# return 1 (reject) or 0 (do not reject H0)
return(as.integer(max(c(outx, outy)) > 5))
}
n1 <- n2 <- 20
mu1 <- mu2 <- 0
sigma1 <- sigma2 <- 1
m <- 10000
tests <- replicate(m, expr = {
x <- rnorm(n1, mu1, sigma1)
y <- rnorm(n2, mu2, sigma2)
x <- x - mean(x) #centered by sample mean
y <- y - mean(y)
count5test(x, y)
} )
alphahat <- mean(tests)
print(alphahat)
### Example 6.15 (Count Five test, cont.)
n1 <- 20
n2 <- 30
mu1 <- mu2 <- 0
sigma1 <- sigma2 <- 1
m <- 10000
alphahat <- mean(replicate(m, expr={
x <- rnorm(n1, mu1, sigma1)
y <- rnorm(n2, mu2, sigma2)
x <- x - mean(x) #centered by sample mean
y <- y - mean(y)
count5test(x, y)
}))
print(alphahat)
### Example 6.16 (Count Five, cont.)
# generate samples under H1 to estimate power
sigma1 <- 1
sigma2 <- 1.5
power <- mean(replicate(m, expr={
x <- rnorm(20, 0, sigma1)
y <- rnorm(20, 0, sigma2)
count5test(x, y)
}))
print(power)

复制代码

地板

jiangqing001 发表于 2014-11-21 16:57:55

牛啊！！

7楼

lyslz 发表于 2014-11-24 22:16:49

zap na er ne ?

8楼

潭生.经济学笔记 发表于 2014-12-1 16:10:36

【独家发布】Statistical Computing with R [

9楼

潭生.经济学笔记 发表于 2014-12-1 16:59:29

Statistical Computing with R

加关注串个门加好友发消息 0关注 0粉丝禁止访问 Elodie1120 当前离线阅读权限 0 威望 0 级论坛币 12 个通用积分 1.0000 学术水平 0 点热心指数 0 点信用等级 0 点经验 196 点帖子 2 精华 0 在线时间 15 小时注册时间 2015-9-9 最后登录 2024-12-3 雷达卡	10楼 Elodie1120 发表于 2016-10-24 15:04:54 提示: 作者被禁止或删除内容自动屏蔽

	回复举报

[休闲其它] 【独家发布】Statistical Computing with R [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

扫码加我拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

本版微信群

[休闲其它] 【独家发布】Statistical Computing with R [推广有奖]

经管之家送您一份

经管之家联合CDA

感谢您参与论坛问题回答

扫码加我 拉你入群

相关帖子

本帖被以下文库推荐

浏览过的帖子

浏览过的版块

本版微信群

扫码加我拉你入群