阅读权限 255 威望 0 级论坛币 49957 个 通用积分 79.5487 学术水平 253 点 热心指数 300 点 信用等级 208 点 经验 41518 点 帖子 3256 精华 14 在线时间 766 小时 注册时间 2006-5-4 最后登录 2022-11-6
## ----module, echo=FALSE, results="asis"----------------------------------
Module <- "BayesO"
cat(paste0("\\newcommand{\\Module}{", Module, "}"))
## ----setup, child="mycourse.Rnw"-----------------------------------------
## ----setup_options, include=FALSE----------------------------------------
library(knitr)
library(xtable)
opts_chunk$set(cache=FALSE)
opts_chunk$set(out.width='0.8\\textwidth')
opts_chunk$set(fig.align='center')
opts_chunk$set(src.top=NULL)
opts_chunk$set(src.bot=NULL)
opts_chunk$set(out.lines=4)
opts_chunk$set(out.truncate=80)
opts_chunk$set(fig.path=sprintf("figures/%s/", Module))
opts_chunk$set(cache.path=sprintf("cache/%s/", Module))
opts_chunk$set(bib.file=paste0(Module, ".bib"))
# Leave code as I have formatted it.
opts_chunk$set(tidy=FALSE)
# Hooks
# Allow auto crop of base graphics plots when crop=TRUE.
knit_hooks$set(crop=hook_pdfcrop)
# Truncate long lines and long output
hook_output <- knit_hooks$get("output")
hook_source <- knit_hooks$get("source")
knit_hooks$set(output=function(x, options)
{
if (options$results != "asis")
{
# Split string into separate lines.
x <- unlist(stringr::str_split(x, "\n"))
# Trim to the number of lines specified.
if (!is.null(n <- options$out.lines))
{
if (length(x) > n)
{
# Truncate the output.
x <- c(head(x, n), "....\n")
}
}
# Truncate each line to length specified.
if (!is.null(m <- options$out.truncate))
{
len <- nchar(x)
x[len>m] <- paste0(substr(x[len>m], 0, m-3), "...")
}
# Paste lines back together.
x <- paste(x, collapse="\n")
# Replace ' = ' with '=' - my preference. Hopefully won't
# affect things inappropriately.
x <- gsub(" = ", "=", x)
}
hook_output(x, options)
},
source=function(x, options)
{
# Split string into separate lines.
x <- unlist(stringr::str_split(x, "\n"))
# Trim to the number of lines specified.
if (!is.null(n <- options$src.top))
{
if (length(x) > n)
{
# Truncate the output.
if (is.null(m <-options$src.bot)) m <- 0
x <- c(head(x, n+1), "\n....\n", tail(x, m+2))
}
}
# Paste lines back together.
x <- paste(x, collapse="\n")
hook_source(x, options)
})
# Optionally allow R Code chunks to be environments so we can refer to them.
knit_hooks$set(rcode=function(before, options, envir)
{
if (before)
sprintf('\\begin{rcode}\\label{%s}\\hfill{}', options$label)
else
'\\end{rcode}'
})
## ----load_packages, message=FALSE----------------------------------------
library(rattle) # weather and normVarNames()
library(randomForest) # na.roughfix()
library(e1071) # naiveBayes()
library(ROCR) # prediction()
## ----common_intro, child='documentation.Rnw', eval=TRUE------------------
## ----help_library, eval=FALSE, tidy=FALSE--------------------------------
## ?read.csv
## ----help_package, eval=FALSE--------------------------------------------
## library(help=rattle)
## ----record_start_time, echo=FALSE---------------------------------------
start.time <- proc.time()
## ----generate_bib, echo=FALSE, message=FALSE, warning=FALSE--------------
# Write all packages in the current session to a bib file
if (is.null(opts_chunk$get("bib.file"))) opts_chunk$set(bib.file="Course.bib")
write_bib(sub("^.*/", "", grep("^/", searchpaths(), value=TRUE)),
file=opts_chunk$get("bib.file"))
system(paste("cat extra.bib >>", opts_chunk$get("bib.file")))
# Fix up specific issues.
# R-randomForest
system(paste("perl -pi -e 's|Fortran original by Leo Breiman",
"and Adele Cutler and R port by|Leo Breiman and",
"Adele Cutler and|'", opts_chunk$get("bib.file")))
# R-C50
system(paste("perl -pi -e 's|. C code for C5.0 by R. Quinlan|",
" and J. Ross Quinlan|'", opts_chunk$get("bib.file")))
# R-caret
system(paste("perl -pi -e 's|. Contributions from|",
" and|'", opts_chunk$get("bib.file")))
# Me
system(paste("perl -pi -e 's|Graham Williams|",
"Graham J Williams|'", opts_chunk$get("bib.file")))
## ----bayes_prepare_data_weather, message=FALSE---------------------------
library(rattle) # Normalise names normVarNames() and weather dataset.
library(randomForest) # Impute missing using na.roughfix().
dsname <- "weather"
ds <- get(dsname)
names(ds) <- normVarNames(names(ds))
vars <- names(ds)
target <- "rain_tomorrow"
risk <- "risk_mm"
id <- c("date", "location")
ignore <- union(id, if (exists("risk")) risk)
vars <- setdiff(vars, ignore)
inputs <- setdiff(vars, target)
numi <- which(sapply(ds[inputs], is.numeric))
numc <- names(numi)
cati <- which(sapply(ds[inputs], is.factor))
catc <- names(cati)
ds[numc] <- na.roughfix(ds[numc]) # Impute missing values, roughly.
ds[target] <- as.factor(ds[[target]]) # Ensure the target is categoric.
nobs <- nrow(ds)
form <- formula(paste(target, "~ ."))
set.seed(42)
train <- sample(nobs, 0.7*nobs)
test <- setdiff(seq_len(nobs), train)
actual <- ds[test, target]
risks <- ds[test, risk]
## ----check_dataset, out.lines=5------------------------------------------
dim(ds)
names(ds)
head(ds)
tail(ds)
str(ds)
summary(ds)
## ----build_nb_model, out.lines=26----------------------------------------
library(e1071)
model <- naiveBayes(form, data=ds[train, vars])
model
## ----evaluate_nb_model, message=FALSE, out.lines=NULL--------------------
classes <- predict(model, ds[test, vars], type="class")
acc <- sum(classes == actual, na.rm=TRUE)/length(actual)
err <- sum(classes != actual, na.rm=TRUE)/length(actual)
predicted <- predict(model, ds[test, vars], type="raw")[,2]
pred <- prediction(predicted, ds[test, target])
ate <- attr(performance(pred, "auc"), "y.values")[[1]]
riskchart(predicted, actual, risks)
round(table(actual, classes, dnn=c("Actual", "Predicted"))/length(actual), 2)
## ----common_outtro, child="finale.Rnw", eval=TRUE------------------------
## ----syinfo, child="sysinfo.Rnw", eval=TRUE------------------------------
## ----echo=FALSE, message=FALSE-------------------------------------------
require(Hmisc)
pkg <- "knitr"
pkg.version <- installed.packages()[pkg, 'Version']
pkg.date <- installed.packages(fields="Date")[pkg, 'Date']
pkg.info <- paste(pkg, pkg.version, pkg.date)
rev <- system("bzr revno", intern=TRUE)
cpu <- system(paste("cat /proc/cpuinfo | grep 'model name' |",
"head -n 1 | cut -d':' -f2"), intern=TRUE)
ram <- system("cat /proc/meminfo | grep MemTotal: | awk '{print $2}'",
intern=TRUE)
ram <- paste0(round(as.integer(ram)/1e6, 1), "GB")
user <- Sys.getenv("LOGNAME")
node <- Sys.info()[["nodename"]]
user.node <- paste0(user, "@", node)
gcc.version <- system("g++ -v 2>&1 | grep 'gcc version' | cut -d' ' -f1-3",
intern=TRUE)
os <- system("lsb_release -d | cut -d: -f2 | sed 's/^[ \t]*//'", intern=TRUE) 复制代码