用R中prophet包做时序预测

来源:互联网 发布:汉族不平等知乎 编辑:程序博客网 时间:2024/05/21 06:53
      最近又接到一个预测项目需求,主要是预测每天投资用户会投资不同产品多少金额,属于每天即时预测,需要拿最近一年的数据做测试集,来预测每天不同期限产品分别被投资多少金额,然后通过这些金额每天找借款端匹配借款需求,借款端运营通过资金端需求来动态调整营销活动,通过资金需求多少来有效运营借款需求,形成与资金端的良性互动,节约闲置资金成本。

      就这样一个需求,如何实现?我首先想到通过业务经验的方法来实现,需要从N多产品中抽丝剥茧,找到产品、推广,促销,节日之间的关系,最终确定一个期限的产品当天成交预测值;但是从成交数据来看,这些数据都是每天在变化的,不能明显看出两者之间有明显的相关关系,用这些现有的数据关系能预测将来的成交吗?

      既然是预测问题,那我首先想到的是时序预测,那么问题就来了,如何程序化这些现有的数据关系?

      在网上找了一些相关模型,并没有模型来实现我所说的推广,促销,节日,加息,降息等业务时点与预测数据的关系,仔细查找,首先(注意还有后续,我会在下篇博文做出解释)发现了一个名叫prophet的时序预测包,里面可以用不同类型的holiday值来对模型进行业务时点的匹配和修正,非常适合目前的项目需求。不管三七二十一,先安装了再说。

      安装了之后需要初始化数据,初始化节假日,初始化模型,调整参数,衡量结果,具体代码如下:

library(prophet)
library(dplyr)
#初始化数据
all<-read.csv('d:/Rdata/zjd/ts/all.csv',na.string='NA',header=T)
alln<-read.csv('d:/Rdata/zjd/ts/alln.csv',na.string='NA',header=T)
qb30<-read.csv('d:/Rdata/zjd/ts/qb30.csv',na.string='NA',header=T)
qb45<-read.csv('d:/Rdata/zjd/ts/qb45.csv',na.string='NA',header=T)
qb60<-read.csv('d:/Rdata/zjd/ts/qb60.csv',na.string='NA',header=T)
qb90<-read.csv('d:/Rdata/zjd/ts/qb90.csv',na.string='NA',header=T)
qb180<-read.csv('d:/Rdata/zjd/ts/qb180.csv',na.string='NA',header=T)
qb270<-read.csv('d:/Rdata/zjd/ts/qb270.csv',na.string='NA',header=T)
qb365<-read.csv('d:/Rdata/zjd/ts/qb365.csv',na.string='NA',header=T)
qb730<-read.csv('d:/Rdata/zjd/ts/qb730.csv',na.string='NA',header=T)
qb1095<-read.csv('d:/Rdata/zjd/ts/qb1095.csv',na.string='NA',header=T)
qb1095m<-read.csv('d:/Rdata/zjd/ts/qb1095+.csv',na.string='NA',header=T)
qbother<-read.csv('d:/Rdata/zjd/ts/qbother.csv',na.string='NA',header=T)
#holiday<-read.csv('d:/Rdata/zjd/ts/holidays.csv',na.string='NA',header=T)
historyalln <- data.frame(ds = seq(as.Date('2017-01-01'), as.Date('2017-09-17'), by = 'd'), y = alln$yn)
historyalln <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yn)
historyallo <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yo)
history30n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yn)
plot(history30n$ds,history30n$y)
history45n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yn)
history60n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yn)
history90n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yn)
history180n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yn)
history270n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yn)
history365n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yn)
history730n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yn)
history1095n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yn)
history1095m <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yn)
historyothern <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yn)
history30o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yo)
history45o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yo)
history60o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yo)
history90o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yo)
history180o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yo)
history270o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yo)
history365o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yo)
history730o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yo)
history1095o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yo)
history1095p <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yo)
historyothero <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yo)
#初始化节假日
holiday <- data_frame(
  holiday = 'holiday',
  ds = as.Date(c(  '2017-01-01','2017-01-27',  '2017-04-02','2017-04-29',
                   '2017-05-28')),
  lower_window = 0,
  upper_window = 2
)
payday <- data_frame(
  holiday = 'payday',
  ds = as.Date(c(  '2016-01-26',
    '2016-02-26','2016-03-26',  '2016-04-26','2016-05-26',
    '2016-06-26','2016-07-26',  '2016-08-26','2016-09-26',
    '2016-10-26','2016-11-26',  '2016-12-26','2017-01-26',
    '2017-02-26','2017-03-26',  '2017-04-26','2017-05-26',
    '2017-06-26','2017-07-26',  '2017-08-26','2017-09-26')),
  lower_window = 0,
  upper_window = 1
)
VIP <- data_frame(
  holiday = 'VIP',
  ds = as.Date(c(
    '2016-06-09','2016-07-09',  '2016-08-09','2016-09-09',
    '2016-10-09','2016-11-09',  '2016-12-09','2017-01-09',
    '2017-02-09','2017-03-09',  '2017-04-09','2017-05-09',
    '2017-06-09', '2017-07-09', '2017-08-09',  '2017-09-09')),
  lower_window = 0,
  upper_window = 1
)
raiserate <- data_frame(
  holiday = 'raiserate',
  ds = as.Date(c( '2017-05-08','2017-06-19',  '2017-07-24')),
  lower_window = 0,
  upper_window = 7
)
reducerate <- data_frame(
  holiday = 'reducerate',
  ds = as.Date(c('2016-06-20','2017-02-22', '2017-03-27', '2017-04-05')),
  lower_window = 0,
  upper_window = 7
)
newcustomer <- data_frame(
  holiday = 'newcustomer',
  ds = as.Date(c('2016-01-01','2016-01-15','2016-07-18','2016-08-24',
                 '2016-09-07','2016-12-09','2016-12-22','2017-01-06','2017-04-12')),
  lower_window = 0,
  upper_window = 7
)
promotion <- data_frame(
  holiday = 'promotion',
  ds = as.Date(c('2016-07-07','2016-11-03','2017-01-10', '2017-01-17', '2017-02-10')),
  lower_window = 0,
  upper_window = 7
)
holidays<-bind_rows(holiday,payday,VIP, raiserate,reducerate,newcustomer,promotion)
#初始化模型
nall <- prophet(historyalln,
                holidays = holidays,
                seasonality.prior.scale=5,
                changepoint.prior.scale=0.9,
                holidays.prior.scale = 20,
                mcmc.samples = 500,
                interval.width=0.9,
                uncertainty.samples = 30)
oall <- prophet(historyallo,
                holidays = holidays,
                seasonality.prior.scale=5,
                changepoint.prior.scale=0.1,
                holidays.prior.scale = 1,
                mcmc.samples = 100,
                interval.width=0.9,
                uncertainty.samples = 100)
n30 <- prophet(history30n,
               holidays = holidays,
               seasonality.prior.scale=5,
               changepoint.prior.scale=0.1,
               holidays.prior.scale = 1,
               mcmc.samples = 100,
               interval.width=0.9,
               uncertainty.samples = 100)
n45 <- prophet(history45n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n60 <- prophet(history60n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n90 <- prophet(history90n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n180 <- prophet(history180n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n270 <- prophet(history270n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n365 <- prophet(history365n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n730 <- prophet(history730n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n1095 <- prophet(history1095n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n1095m <- prophet(history1095m,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
nother <- prophet(historyothern,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o30 <- prophet(history30o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o45 <- prophet(history45o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o60 <- prophet(history60o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o90 <- prophet(history90o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o180 <- prophet(history180o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o270 <- prophet(history270o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o365 <- prophet(history365o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o730 <- prophet(history730o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o1095 <- prophet(history1095o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o1095p <- prophet(history1095p,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
oother <- prophet(historyothero,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
#设定预测时间
futurealln <- make_future_dataframe(nall, periods = 7)
futureallo <- make_future_dataframe(oall, periods = 7)
future30n <- make_future_dataframe(n30, periods = 7)
future45n <- make_future_dataframe(n45, periods = 7)
future60n <- make_future_dataframe(n60, periods = 7)
future90n <- make_future_dataframe(n90, periods = 7)
future180n <- make_future_dataframe(n180, periods = 7)
future270n <- make_future_dataframe(n270, periods = 7)
future365n <- make_future_dataframe(n365, periods = 7)
future730n <- make_future_dataframe(n730, periods = 7)
future1095n <- make_future_dataframe(n1095, periods = 7)
future1095m <- make_future_dataframe(n1095m, periods = 7)
futureothern <- make_future_dataframe(nother, periods = 7)
future30o <- make_future_dataframe(o30, periods = 7)
future45o <- make_future_dataframe(o45, periods = 7)
future60o <- make_future_dataframe(o60, periods = 7)
future90o <- make_future_dataframe(o90, periods = 7)
future180o <- make_future_dataframe(o180, periods = 7)
future270o <- make_future_dataframe(o270, periods = 7)
future365o <- make_future_dataframe(o365, periods = 7)
future730o <- make_future_dataframe(o730, periods = 7)
future1095o <- make_future_dataframe(o1095, periods = 7)
future1095p <- make_future_dataframe(o1095p, periods = 7)
futureothero <- make_future_dataframe(oother, periods = 7)
tail(future30n)
#预测
forecastalln <- predict(nall, futurealln)
forecastallo <- predict(oall, futureallo)
forecast30n <- predict(n30, future30n)
forecast45n <- predict(n45, future45n)
forecast60n <- predict(n60, future60n)
forecast90n <- predict(n90, future90n)
forecast180n <- predict(n180, future180n)
forecast270n <- predict(n270, future270n)
forecast365n <- predict(n365, future365n)
forecast730n <- predict(n730, future730n)
forecast1095n <- predict(n1095, future1095n)
forecast1095m <- predict(n1095m, future1095m)
forecastothern <- predict(nother, futureothern)
forecast30o <- predict(o30, future30o)
forecast45o <- predict(o45, future45o)
forecast60o <- predict(o60, future60o)
forecast90o <- predict(o90, future90o)
forecast180o <- predict(o180, future180o)
forecast270o <- predict(o270, future270o)
forecast365o <- predict(o365, future365o)
forecast730o <- predict(o730, future730o)
forecast1095o <- predict(o1095, future1095o)
forecast1095p <- predict(o1095p, future1095p)
forecastothero <- predict(oother, futureothero)
# tail(forecast90n[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')])
forecastalln %>%
  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%
  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%
  tail(10)
forecastallo %>%
  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%
  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%
  tail(10)
#直线预测
plot(nall, forecastalln)
plot(n30, forecast30n)
plot(oall, forecastallo)
#趋势分解
prophet_plot_components(nall, forecastalln)
prophet_plot_components(oall, forecastallo)
a<-cbind(futurealln[1],forecastalln$yhat,forecast30n$yhat,forecast45n$yhat,forecast60n$yhat,forecast90n$yhat,forecast180n$yhat,forecast270n$yhat,forecast365n$yhat,forecast730n$yhat,forecast1095n$yhat,forecast1095m$yhat,forecastothern$yhat)
b<-cbind(futureallo[1],forecastallo$yhat,forecast30o$yhat,forecast45o$yhat,forecast60o$yhat,forecast90o$yhat,forecast180o$yhat,forecast270o$yhat,forecast365o$yhat,forecast730o$yhat,forecast1095o$yhat,forecast1095p$yhat,forecastothero$yhat)
write.csv(a,file = "d:/Rdata/zjd/ts/qballn.csv")
write.csv(b,file = "d:/Rdata/zjd/ts/qballo.csv")


目前预测准确率总金额能较为准确的预测,调整好参数后总成交金额差值约为1%左右;不同产品期限成交金额预测会有10%上下的波动。
参考文献:http://blog.csdn.net/sinat_26917383/article/details/57419862
http://blog.csdn.net/justinjia91/article/details/62470076
阅读全文
0 0