Apriori算法的R语言实现
来源:互联网 发布:windows文件夹加密软件 编辑:程序博客网 时间:2024/05/18 00:08
1)数据准备
library(arules)
a<-matrix(0,7,4,dimnames=list(c("basket1","basket2","basket3","basket4",
+ "basket5","basket6","basket7"),c("item1","item2","item3","item4")))
> a[1,]<-c(1,0,1,0)
> a[2,]<-c(0,0,1,1)
> a[3,]<-c(1,1,1,1)
> a[4,]<-c(1,1,0,0)
> a[5,]<-c(0,0,1,0)
> a[6,]<-c(1,0,1,1)
> a[7,]<-c(0,1,1,1)
a.class<-as(a,"transactions")
apriori函数进行关联分析
rules<-apriori(a.class,parameter=list(supp=0.2,conf=0.6,target="rules"))
inspect(rules)
lhs rhs support confidence lift
1 {} => {item3} 0.8571429 0.8571429 1.0000000
2 {item2} => {item1} 0.2857143 0.6666667 1.1666667
3 {item2} => {item4} 0.2857143 0.6666667 1.1666667
4 {item2} => {item3} 0.2857143 0.6666667 0.7777778
5 {item1} => {item3} 0.4285714 0.7500000 0.8750000
6 {item4} => {item3} 0.5714286 1.0000000 1.1666667
7 {item3} => {item4} 0.5714286 0.6666667 1.1666667
8 {item2,item4} => {item3} 0.2857143 1.0000000 1.1666667
9 {item2,item3} => {item4} 0.2857143 1.0000000 1.7500000
10 {item1,item4} => {item3} 0.2857143 1.0000000 1.1666667
11 {item1,item3} => {item4} 0.2857143 0.6666667 1.1666667
rules<-apriori(a.class,parameter=list(supp=0.2,conf=0.6,target="rules"),appearance=
+ list(rhs="item3",default="lhs"))
inspect(rules)
lhs rhs support confidence lift
1 {} => {item3} 0.8571429 0.8571429 1.0000000
2 {item2} => {item3} 0.2857143 0.6666667 0.7777778
3 {item1} => {item3} 0.4285714 0.7500000 0.8750000
4 {item4} => {item3} 0.5714286 1.0000000 1.1666667
5 {item2,item4} => {item3} 0.2857143 1.0000000 1.1666667
6 {item1,item4} => {item3} 0.2857143 1.0000000 1.1666667
rules<-rules[which(rules@quality$lift>1)]
rules.sorted<-sort(rules,by="lift")
write(rules,file="apriori_rules.txt",sep = "|",col.names=NA)
序列关联分析——可以挖掘带有因果效应的关联
library(arulesSequences)
item<-factor(c("A","B","B","A","B","A","C","A","B","C","B","A","B","A","A","B","A","B"))
seqid<-c(1,1,1,1,1,2,2,2,2,2,2,3,3,3,4,4,4,4)
eventid<-c(10,10,20,30,30,20,20,30,30,30,50,10,30,40,30,30,40,50)
data.tran<-as(data,"transactions")
transactionInfo(data.tran)$sequenceID<-seqid
transactionInfo(data.tran)$eventID<-eventid
transactionInfo(data.tran)
result<-cspade(data.tran,parameter = list(support = 0.5),control = list(verbose = TRUE))
result<-sort(result,by="support")
inspect(result)
实例:用户点击页面的行为分析
root<-"C:/"
tmpp<-read.fwf(paste(root,"anonymous-msweb.data",sep=""),widths=c(60))
train_list<-tmpp$V1
tmp_page<-c(0)
tmp_sequenceid<-c(0)
tmp_eventid<-c(0)
m<-0
sequenceid<-0
train_length<-length(train_list)
for (i in 302:train_length){
tmp<-unlist(strsplit(as.character(train_list[i]),","))
if(tmp[1]=="C"){
sequenceid<-sequenceid+1
eventid<-0
}else if (tmp[1]=="V") {
m<-m+1
eventid<-eventid+1
tmp_sequenceid[m]<-sequenceid
tmp_eventid[m]<-eventid
tmp_page[m]<-as.numeric(tmp[2])
}
}
tmp_page<-factor(tmp_page)
data<-data.frame(page=tmp_page,seqid=tmp_sequenceid,eventid=tmp_eventid)
user.page=1034
user.sequenceid<-unique(data$seqid[which(data$page==user.page)])
i<-i;data.user<-data[1,]
for (seq.i in user.sequenceid) {
data.user<-rbind(data.user,subset(data,seqid==seq.i))
i<-i+1
}
data.user<-data.user[-1,]
#数据准备
library(arulesSequences)
tmp_data<-data.frame(page=data.user$page)
data.tran<-as(tmp_data,"transactions")
transactionInfo(data.tran)$sequenceID<-data.user$seqid
transactionInfo(data.tran)$eventID<-data.user$eventid
#函数进行分析
result<-cspade(data.tran,parameter=list(support=0,maxlen=2),control=list(verbose=TRUE))
result<-sort(result,by="support")
page.2<-paste(".*page=",user.page,"[^\\}]*\\}>",sep="")
result.2<-result[grep(page.2,as(result,"data.frame")$sequence)]
inspect(result.2)
#筛选重点页面并衡量其引导能力
result.data.frame<-as(result.2[-1],"data.frame")
persent<-result.data.frame$support/sum(result.data.frame$support)
sum.persent<-cumsum(persent)
result.data.frame<-cbind(result.data.frame,persent,sum.persent)
max.persent=0.7
result.data.frame<-subset(result.data.frame,sum.persent<=max.persent)
result.data.frame
page<-0;i<-1
for (i.seq in result.data.frame$sequence) {
real_seq1<-regexpr("<\\{page=",i.seq)+7
real_seq2<-regexpr("\\}",i.seq)[1]-1
page[i]<-substr(i.seq,real_seq1,real_seq2)
i<-i+1
}
i<-1;uv<-0
for (i.page in page){
uv[i]<-length(unique(data$seqid[which(data$page==i.page)]))
i<-i+1
}
conf<-result.data.frame$support*result@info$nsequences/uv
result.data.frame<-cbind(result.data.frame,conf=conf,page=page)
#绘制结果表图
barplot(as.matrix(result.data.frame$persent,nrow=1),ylim=c(0,1),beside=TRUE,xlab=
"页面名称",main="引导用户进入关键页面1034的重点页面分析")
lines(0.5+c(1:nrow(result.data.frame)),result.data.frame$conf,type="b",col="red")
text(0.5+c(1:nrow(result.data.frame)),result.data.frame$conf,labels=paste(round(result.data.frame$conf*100,2),"%",sep=""))
axis(1,at=0.5+c(1:nrow(result.data.frame)),labels=result.data.frame$page,tick=FALSE)
- Apriori算法的R语言实现
- 关联规则R语言实战(Apriori算法)
- Apriori算法的实现
- 机器学习算法(一)——关联规则Apriori算法及R语言实现方法
- 【R的机器学习】Apriori算法
- Apriori算法的Python实现!
- Apriori算法的实现二
- Apriori算法的实现三
- Apriori 算法 java 的实现
- Apriori算法的C++实现
- Apriori算法的Python实现
- Apriori算法的python实现
- Apriori算法的java实现
- Apriori算法的JAVA实现
- Apriori算法的Java实现
- Apriori算法的python实现
- Apriori算法的C++实现
- Apriori算法(c++/c#语言实现)
- 802.11n的HTC-MCS速率
- 给UIview加上xib
- 转一个 Xcode 7 缺少 *.dylib库的解决方法
- 密码学中的“盐值 Salt”
- 关于fseek不能定位大于2G文件的问题
- Apriori算法的R语言实现
- z-fighting
- Java中的位移操作 >>、<<
- android studio import android-source-code
- linux 基本命令总结
- 基于Triplet loss 函数训练人脸识别深度网络
- HDbaseT 高清传输更简单——只需一根网线
- Linux流编程
- RabbitMQ 官方说明文档