lianxi
来源:互联网 发布:制作电子文档软件 编辑:程序博客网 时间:2024/05/22 06:13
#Dodger促销和上座率的预测模型library(car) # 线性回归的包library(lattice) # 绘图软件包# 读入数据,并建立数据框导入dodgers <- read.csv("/Users/lily/Documents/model_predict/Chapter_2/dodgers.csv")print(str(dodgers)) # 查看数据结构'data.frame': 81 obs. of 12 variables:
$ month : Factor w/ 7 levels "APR","AUG","JUL",..: 1 1 1 1 1 1 1 1 1 1 ...
$ day : int 10 11 12 13 14 15 23 24 25 27 ...
$ attend : int 56000 29729 28328 31601 46549 38359 26376 44014 26345 44807 ...
$ day_of_week: Factor w/ 7 levels "Friday","Monday",..: 6 7 5 1 3 4 2 6 7 1 ...
$ opponent : Factor w/ 17 levels "Angels","Astros",..: 13 13 13 11 11 11 3 3 3 10 ...
$ temp : int 67 58 57 54 57 65 60 63 64 66 ...
$ skies : Factor w/ 2 levels "Clear ","Cloudy": 1 2 2 2 2 1 2 2 2 1 ...
$ day_night : Factor w/ 2 levels "Day","Night": 1 2 2 2 2 1 2 2 2 2 ...
$ cap : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
$ shirt : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
$ fireworks : Factor w/ 2 levels "NO","YES": 1 1 1 2 1 1 1 1 1 2 ...
$ bobblehead : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
NULL
# 定义一周七天的次序变量,以便绘图和数据小结时用dodgers$ordered_day_of_week <- with(data = dodgers, ifelse((day_of_week == 'Monday'), 1, ifelse((day_of_week == 'Tuesday'), 2, ifelse((day_of_week == 'Wednesday'), 3, ifelse((day_of_week == 'Thursday'), 4, ifelse((day_of_week == 'Friday'), 5, ifelse((day_of_week == 'Saturday'), 6, 7)))))))dodgers$ordered_day_of_week <- factor(dodgers$ordered_day_of_week, levels = 1:7, labels = c("Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun"))#以标准绘图观测数据分析,一周七天的观众人数par(family='STKaiti')with(data = dodgers, plot(ordered_day_of_week, attend/1000,xlab = "一周内的每一天", ylab = "出席率(单位:1000)", col = "violet", las = 1))#当dodger采用摇头娃娃促销时with(dodgers, table(bobblehead, ordered_day_of_week)) # 星期二的摇头娃娃促销ordered_day_of_week
bobblehead Mon Tue Wed Thur Fri Sat Sun
NO 12 7 12 3 13 11 12
YES 0 6 0 2 0 2 1
#定义月次序变量,绘制数据小结时用dodgers$ordered_month <- with(data = dodgers, ifelse((month == 'APR'), 4, ifelse((month == 'MAY'), 5, ifelse((month == 'JUN'), 6, ifelse((month == 'JUL'), 7, ifelse((month == 'AUG'), 8, ifelse((month == 'SEP'), 9, 10)))))))dodgers$ordered_month <- factor(dodgers$ordered_month, levels = 4:10, labels = c("April", "MAY", "June", "July", "Aug", "Sept", "Oct"))#每个月的观众人数par(family = 'STKaiti')with(dodgers, plot(ordered_month, attend/1000, xlab = "月份", ylab = "出席率(单位:1000人)", col = "light blue", las = 1))
#使用更多的变量来观测数据分析,观察观众人数与日场/夜场#天空是否晴朗、气温,或者是否燃放烟火的关系group.labels <- c("No Fireworks", "Fireworks")group.symbols <- c(21, 24)group.colors <- c("black", "black")group.fill <- c("black", "red")xyplot(attend/1000 ~ temp | skies + day_night, data = dodgers, groups = fireworks, pch = group.symbols, aspect = 0.5, cex = 1.5, col = group.colors, fill = group.fill, layout = c(2, 2), type = c("p", "g"), strip = strip.custom(strip.levels = TRUE, strip.names = FALSE, style = 1), xlab = "Temperature (Degrees Fahrenheit)", ylab = "Attendance(thousands)", key = list(space = "top", text = list(rev(group.labels), col = rev(group.colors)), points = list(pch = rev(group.symbols), col = rev(group.colors), fill = rev(group.fill))))# 观众人数与比赛对手,日场、夜场的关系group.labels <- c("Day", "Night")group.symbols <- c(1, 20)group.symbols.size <- c(2, 2.75)bwplot(opponent ~ attend/1000, data = dodgers, groups = day_night, xlab = "Attend(thousands)", aspect = 2, panel = function(x, y, groups, subscripts,...) { panel.grid(h = (length(levels(dodgers$opponent)) -1), v = -1) panel.stripplot(x, y, groups = groups, subscripts = subscripts, cex = group.symbols.size, pch = group.symbols, col = "darkblue") }, key = list(space = "top", text = list(group.labels, col = "black"), points = list(pch = group.symbols, cex = group.symbols.size, col = "darkblue")) )#采用训练并测试的方案set.seed(1234)training_test <- c(rep(1, length = trunc((2/3) * nrow(dodgers))),rep(2, length = (nrow(dodgers) - trunc((2/3) * nrow(dodgers)))))dodgers$training_test <- sample(training_test) # 随机排列dodgers$training_test <- factor(dodgers$training_test, levels = c(1, 2), labels = c("TRAIN", "TEST"))dodgers.train <- subset(dodgers, training_test == 'TRAIN')print(str(dodgers.train)) # 查看数据框属性dodgers.test <- subset(dodgers, training_test == 'TEST')print(str(dodgers.test))
阅读全文
0 0
- lianxi
- lianxi
- lianxi
- lianxi
- lianxi
- lianxi
- Week 7 lianxi
- wikioi线段树lianxi
- week 10 lianxi
- week 11 lianxi
- week 14 lianxi
- html语法lianxi
- html格式标记lianxi
- JS数据类型lianxi
- C++ Primer plus学习笔记 第三章编程lianxi
- java IO的概念和分类
- MAven学习 — <dependencies> && <dependencyManagement>
- Yolo 9000的darknet实现
- 【剑指offer】面试题3:数组中重复的数字
- Maven学习 — <build> 中的 <plugins>元素的作用
- lianxi
- 将文件内容全部倒置,并写入文件
- 链表的头插法
- 01 目录结构、基础命令的讲解、管道和重定向(讲解邮件的发送)
- 对Java中classpath、path和Java_HOME的理解
- 线程执行完,死亡后,还能再次执行start吗?
- 【树的倍增】【二分 or LCA】USACO MAR11银组 聚会地点
- 部分概念对比
- 算法学习之旅,初级篇(7)--数字塔