- if (!suppressWarnings(require("ggplot2"))) {
- install.packages("ggplot2")
- require("ggplot2")
- }
- if (!suppressWarnings(require("reshape2"))) {
- install.packages("reshape2")
- require("reshape2")
- }
- if (!suppressWarnings(require("plyr"))) {
- install.packages("plyr")
- require("plyr")
- }
- if (!suppressWarnings(require("zoo"))) {
- install.packages("zoo")
- require("zoo")
- }
- if (!suppressWarnings(require("grid"))) {
- install.packages("grid")
- require("grid")
- }
- if (!suppressWarnings(require("gridExtra"))) {
- install.packages("gridExtra")
- require("gridExtra")
- }
- report <- read.csv("H:/自媒体/2015-07-20/茶与咖啡/report.csv", header = T, sep = ",", stringsAsFactors = F)
- report$week <- as.Date(report[,1])
饮品就如人的面孔一样千差万别,但是人们经常饮用的饮料无外乎茶、啤酒、咖啡、酒这四大类,这四类饮品已经深深的融入到人类的历史文化中了。
中国是茶的发源地,中国人种茶、饮茶已经有3000的历史了,茶文化已经融入到中国人的骨髓里,常言道:“开门七件事,柴米油盐酱醋茶”,茶已经从士绅的茶座走向了每一个中国人的心里,也走出了中国;啤酒的销量仅次于水和茶,说起啤酒的历史那应该和酒一样古老,公元前6000年左右巴比伦人用黏土板雕刻的献祭用啤酒制作法是最古老的有关啤酒的文献,这一清凉夏日饮品19世纪才传入中国,但史载我们的仙人很早以前也掌握了麦芽酿造技术,即所谓的蘖法酿醴(li),但是汉代以后该方法失传了,取而代之的是酒曲发酵的甜酒;而咖啡应该是最年轻的,但是它却以温柔的一面改变着历史和创造历史的人。
#作最近四年的四种饮料的对比,看看谁是number one
- teabar <- cbind(report, year = substr(report$week,1,4))
- teabar <- teabar[, -1]
- teabar <- aggregate(.~ year, data = teabar,sum)#透视表按年汇总
- teabar <- teabar[9:12,]
- teabar <- melt(teabar, id = "year")
- theme_opts <- theme(panel.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- plot.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- panel.grid.major=element_line(colour=rgb(red = 146, green = 146, blue = 146, max = 255),size=.75),
- #panel.border=element_rect(colour=rgb(red = 242, green = 242, blue = 242, max = 255)),
- axis.ticks=element_blank(),
- axis.text.x = element_text(colour="grey20", size=12),
- axis.text.y = element_text(colour="grey20",size=12),
- axis.text.y = element_text(size=13,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold"),
- axis.title.y=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=1.5),
- axis.title.x=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=-.5),
- legend.position="none")
- cols <- c(rgb(red = 0, green = 130, blue = 137, max = 255),#茶
- rgb(red = 252, green = 157, blue = 154, max = 255),#咖啡
- rgb(red = 253, green = 117, blue = 107, max = 255),#啤酒
- rgb(red = 200, green = 200, blue = 169, max = 255))#酒
- p <- ggplot(teabar,aes(x = variable, y = value, fill = variable)) +
- geom_bar(stat = "identity",fill = cols) +
- coord_flip() +
- ylab("") +
- xlab("") +
- theme_opts + guides(fill=FALSE)
- p + facet_wrap( ~ year, ncol=2)
如果搜索量代表着人们对茶、啤酒、咖啡、酒代表着人们的偏好,那么谁才是人们心中的最佳饮品呢?咖啡是近四年来搜索量均排名第一,其次为茶、酒、啤酒。但从趋势上分析,茶和啤酒正变得越来越热,而咖啡经历暴涨之后略显颓势,有趣的是酒自2005年期搜索量一直在下降,知道近两年进入了平台期。但酒类人家也曾阔过,和酒类相比,2005年之前,其他三类饮料根本不是一个档次。另外,在2007-2010年之间四类饮料的搜索量均跌入了谷底期。
#下一步画年折线图,看看有没有什么趋势
- teabar <- cbind(report, year = substr(report$week,1,4))
- teabar <- teabar[, -1]
- teabar <- aggregate(.~ year, data = teabar,sum)#透视表按年汇总
- #teabar <- melt(teabar, id = "year")
- teabar$year <- 2004:2015
- theme_opts <- theme(panel.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- plot.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- panel.grid.major=element_line(colour=rgb(red = 146, green = 146, blue = 146, max = 255),size=.75),
- panel.border=element_blank(),
- axis.ticks=element_blank(),
- axis.text.x = element_text(colour="grey20", size=12),
- axis.text.y = element_text(colour="grey20",size=12),
- axis.text.y = element_text(size=13,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold"),
- axis.title.y=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=1.5),
- axis.title.x=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=-.5),
- legend.position="none")
- #绘制p1
- p1 <- ggplot(teabar,aes(x = year)) +
- geom_line(aes(y = 茶), color = rgb(red = 0, green = 130, blue = 137, max = 255), size=1.6) +
- annotate("text", x = 2012.5, y = 2300, colour=rgb(red = 0, green = 130, blue = 137, max = 255), label="茶", size = 6, fontface = "bold") +
- geom_line(aes(y = 咖啡), color = rgb(red = 252, green = 157, blue = 154, max = 255), size = 1.6) +
- annotate("text", x = 2012.5, y = 3300, colour = rgb(red = 252, green = 157, blue = 154, max = 255), label = "咖啡", size = 6, fontface = "bold") +
- geom_line(aes(y = 啤酒), color = rgb(red = 253, green = 117, blue = 107, max = 255), size = 1.6) +
- annotate("text", x = 2012.5, y = 1800, colour = rgb(red = 253, green = 117, blue = 107, max = 255), label = "啤酒", size = 6, fontface = "bold") +
- geom_line(aes(y = 酒), color = rgb(red = 200, green = 200, blue = 169, max = 255), size = 1.6) +
- annotate("text", x = 2012.5, y = 2800, colour = rgb(red = 200, green = 200, blue = 169, max = 255), label = "酒", size = 6, fontface = "bold") +
- theme_bw() +
- scale_x_continuous(minor_breaks = 2004, breaks = c(2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015)) + #这里空着这竖行次网格线minor_breaks
- ggtitle("") +
- ylab("") +
- xlab("") +
- theme_opts
从年跨度上分析,我们可能就得到这一点知识,如果就此止步,也许会错过很多偏好的细节。不如把时间的粒度缩小一点,看看什么情况。从季度趋势上分析,每类产品在第四季度和第一季度之间都有有一个峰值,而且在时间上更偏向与第四季度,但啤酒成了一个例外,它的搜索峰值在年中,即第二和第三季度之间,个人猜测,原因可能是第四季度和第一季度是为圣诞节、元旦采购的时间,而第三季度则是清凉夏日。如果这些搜索量和人们的消费习惯相关,很难拒绝将广告、折扣等促销活动安排在峰值时期。
#下一步化季度折线图
- teabar <- report
- teabar$year <- as.yearqtr(teabar$week)
- teabar <- teabar[, -1]
- teabar <- aggregate(.~ year, data = teabar,sum)#透视表按年汇总
- teabar$year <- as.character(teabar$year)
- teabar$year <- gsub(" Q", "0", teabar$year)
- teabar$year <- as.numeric(teabar$year)
- teabar <- teabar[-length(teabar[,1]),]
- #teabar <- melt(teabar, id = "year")
- #teabar$year <- 2004:2015
- #quarter <- teabar$year
- quarter <- 1:44
- teabar <- cbind(teabar, quarter)
- #quarter <- quarter[!is.na(quarter)]
- temp <- seq(1, 44, by = 4)
- theme_opts <- theme(panel.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- plot.background=element_rect(fill=rgb(red = 242, green = 242, blue = 242, max = 255)),
- panel.grid.major=element_line(colour=rgb(red = 146, green = 146, blue = 146, max = 255),size=.75),
- panel.border=element_blank(),#element_rect(colour=rgb(red = 242, green = 242, blue = 242, max = 255)),
- axis.ticks=element_blank(),
- axis.text.x = element_text(colour="grey20", size=12),
- axis.text.y = element_text(colour="grey20",size=12),
- axis.text.y = element_text(size=13,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold"),
- axis.title.y=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=1.5),
- axis.title.x=element_text(size=11,colour=rgb(red = 74, green = 69, blue = 42, max = 255),face="bold",vjust=-.5),
- legend.position="none")
- cols <- c(rgb(red = 0, green = 130, blue = 137, max = 255),#茶
- rgb(red = 252, green = 157, blue = 154, max = 255),#咖啡
- rgb(red = 253, green = 117, blue = 107, max = 255),#啤酒
- rgb(red = 200, green = 200, blue = 169, max = 255))#酒
- #绘制p1
- p1 <- ggplot(teabar,aes(x = quarter)) +
- geom_line(aes(y = 茶), color = rgb(red = 0, green = 130, blue = 137, max = 255), size=1.6) +
- annotate("text", x = 1, y = 525, colour=rgb(red = 0, green = 130, blue = 137, max = 255), label="茶", size = 6, fontface = "bold") +
-
- theme_bw() +
- scale_x_continuous(minor_breaks = 0, breaks = c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41), labels = c("2004Q1","2005Q1","2006Q1","2007Q1","2008Q1","2009Q1","2010Q1","2011Q1","2012Q1","2013Q1","2014Q1")) + #这里空着这竖行次网格线minor_breaks
- ggtitle("") +
- ylab("") +
- xlab("") +
- theme_opts +
- theme(axis.text.x = element_blank(), plot.margin = unit(c(-1,0.5,0.5,0.5), "lines"))
- p2 <- ggplot(teabar,aes(x = quarter)) +
- geom_line(aes(y = 咖啡), color = rgb(red = 252, green = 157, blue = 154, max = 255), size = 1.6) +
- annotate("text", x = 1, y = 650, colour = rgb(red = 252, green = 157, blue = 154, max = 255), label = "咖啡", size = 6, fontface = "bold") +
-
- theme_bw() +
- scale_x_continuous(minor_breaks = 0, breaks = c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41), labels = c("2004Q1","2005Q1","2006Q1","2007Q1","2008Q1","2009Q1","2010Q1","2011Q1","2012Q1","2013Q1","2014Q1")) + #这里空着这竖行次网格线minor_breaks
- ggtitle("") +
- ylab("") +
- xlab("") +
- theme_opts +
- theme(axis.text.x = element_blank(), plot.margin = unit(c(-1,0.5,0.5,0.5), "lines"))
- p3 <- ggplot(teabar,aes(x = quarter)) +
- geom_line(aes(y = 啤酒), color = rgb(red = 253, green = 117, blue = 107, max = 255), size = 1.6) +
- annotate("text", x = 1, y = 500, colour = rgb(red = 253, green = 117, blue = 107, max = 255), label = "啤酒", size = 6, fontface = "bold") +
- theme_bw() +
- scale_x_continuous(minor_breaks = 0, breaks = c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41), labels = c("2004Q1","2005Q1","2006Q1","2007Q1","2008Q1","2009Q1","2010Q1","2011Q1","2012Q1","2013Q1","2014Q1")) + #这里空着这竖行次网格线minor_breaks
- ggtitle("") +
- ylab("") +
- xlab("") +
- theme_opts +
- theme(plot.margin = unit(c(-1,0.5,0.5,0.5), "lines"))
- p4 <- ggplot(teabar,aes(x = quarter)) +
- geom_line(aes(y = 酒), color = rgb(red = 200, green = 200, blue = 169, max = 255), size = 1.6) +
- annotate("text", x = 1, y = 850, colour = rgb(red = 200, green = 200, blue = 169, max = 255), label = "酒", size = 6, fontface = "bold") +
- theme_bw() +
- scale_x_continuous(minor_breaks = 0, breaks = c(1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41), labels = c("2004Q1","2005Q1","2006Q1","2007Q1","2008Q1","2009Q1","2010Q1","2011Q1","2012Q1","2013Q1","2014Q1")) + #这里空着这竖行次网格线minor_breaks
- ggtitle("") +
- ylab("") +
- xlab("") +
- theme_opts +
- theme(plot.margin = unit(c(-1,0.5,0.5,0.5), "lines"))
- gp1<- ggplot_gtable(ggplot_build(p1))
- gp2<- ggplot_gtable(ggplot_build(p2))
- gp3<- ggplot_gtable(ggplot_build(p3))
- gp4<- ggplot_gtable(ggplot_build(p4))
- maxWidth = unit.pmax(gp1$widths[2:3], gp2$widths[2:3], gp3$widths[2:3], gp4$widths[2:3])
- gp1$widths[2:3] <- maxWidth
- gp2$widths[2:3] <- maxWidth
- gp3$widths[2:3] <- maxWidth
- gp4$widths[2:3] <- maxWidth
- grid.arrange(gp1, gp2, gp3, gp4)#, nrow=4设置排列方式
#画月折线图
如果将时间粒度再缩小一些,在月度图上可能看到的、更多的细节,除了年末和年初的一个月会出现峰值以外(啤酒除外),每个一两个月还会出现一个小峰值,这可能又是一个人们的购物习惯,每次购物间隔约为1到2个月。周折线则将这种习惯定位到月末至月初的一周时间。
更多数据及代码链接:http://pan.baidu.com/s/1hqhaPJe 密码:微信索取
关于我们,关注理性与文艺,用数据创作内容性的精致阅读,这里是数据分析挖掘人员与文艺青年的集结地,不做培训,不做鼓吹,只踏踏实实的做一个又一个数据驱动的文章,并设计机器人减轻数据分析的负担,无论你感兴趣还是想参与都可以关注,请加微信公众号大音如霜


雷达卡




京公网安备 11010802022788号







