数据挖掘之关联规则挖掘之SETM算法实现

来源:互联网 发布:欧姆龙plc网络通信 编辑:程序博客网 时间:2024/04/20 16:02

具体算法实现细节请查看IEEE论文:Set-oriented mining for association rules in relational databases

SETM算法代码+测试用例+具体操作步骤下载戳这http://download.csdn.net/detail/michealtx/4266085

sql语言实现,找出来所有的频繁模式。代码如下:


---------------最终版--------------drop table c1,c2,c3,c4,r1,r2,r3,r4-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------为SETMAlgorithm数据库创建空表r1create table R1(tx_id int,item1 nvarchar(50))--把setmbackup数据库的sales_data表中导入数据到r1中insert into R1select *from SETMBackup.dbo.SALES_DATA--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------setm算法过程-----------------将R1按item1排序select * into #temp from R1--把R1内容存到临时表temp中delete from R1--删除R1insert R1 select * from #temp order by item1--将临时表temp按item1升序排序后插入到R1drop table #temp--删除临时表temp--声明最小支持度MinSupportdeclare @minimum_support intset @minimum_support=3declare @MinSupport varchar(10)set @MinSupport=cast(@minimum_support as varchar(10))--声明变量kdeclare @k intset @k=1--由R1生成C1declare @CK varchar(100)set @CK='C'+cast(@k as varchar(100)) declare @CreateTable varchar(100) set @CreateTable=' create table '+@CK+' (item1 nvarchar(10),cnt int) ' declare @RK varchar(10)set @RK='R'+cast(@k as varchar(100)) declare @InsertInto varchar(100)set @InsertInto=' insert into '+@CK+' select item1,COUNT(*) from '+@RK+' group by item1 having COUNT(*)>='+@MinSupportexec(@CreateTable)exec(@InsertInto)print '由R1生成C1'--进入循环declare @RKNotEmpty varchar(100)--set @RKNotEmpty='if not exists(select * from '+@RK+') begin drop table '+@RK+' break end'declare @KMinusOne int set @KMinusOne=1declare @RKMinusOne varchar(10)set @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) while 1=1beginprint '进入循环'set @KMinusOne=@kset @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) set @k=@k+1set @RK='R'+cast(@k as varchar(100)) ---------------------------------------------------------------------将R[k-1]按全部列排序---------------declare @str1 varchar(MAX)set @str1='declare @max int select @max=count(name) from syscolumns where id=object_id('''+@RKMinusOne+''') --print @max declare @min int set @min=1 declare @tt varchar(max) set @tt='''' while @min<=@max begin declare @name varchar(MAX) select @name=name from( select row_number()over(order by getdate()) as num, name from syscolumns where id=object_id('''+@RKMinusOne+'''))a where num=@min set @tt=ISNULL(@tt,'','')+@name+'','' set @min=@min+1 end --print @tt declare @ss varchar(max)set @ss='''' set  @ss=''select * into #temp from '+@RKMinusOne+' order by ''+LEFT(@tt,LEN(@tt)-1)+'' delete from '+@RKMinusOne+' insert '+@RKMinusOne+' select * from #temp drop table #temp''exec(@ss)'exec(@str1)print '将R[k-1]按全部列排序完成'    --扫描R[k-1]和R1将符合条件者合并生成RKTemp(即R'[K])declare @num intset @num=1declare @str varchar(max)set @str='create table RKTemp('declare @sql varchar(1000)set @sql='tx_id int,'while @num<=@kbeginset @sql=ISNULL(@sql,',')+'item'+CAST(@num as varchar)+' nvarchar(50),'set @num=@num+1endset @str=@str+left(@sql,LEN(@sql)-1)+')'exec(@str)declare @str2 varchar(max)set @str2='insert into rktemp select '+@RKMinusOne+'.*,myr1.item1 from '+@RKMinusOne+', R1 myr1where '+@RKMinusOne+'.tx_id=myr1.tx_id and myr1.item1>'+@RKMinusOne+'.item'+cast(@KMinusOne as varchar(10))exec(@str2)print '扫描R[k-1]和R1将符合条件者合并生成Rktemp完成'---------------将RKTemp(即R'[K])按全部item列排序--------------------set @str1='declare @max int select @max=count(name) from syscolumns where id=object_id('''+'RKTemp'+''') --print @max declare @min int set @min=2 declare @tt varchar(max) set @tt='''' while @min<=@max begin declare @name varchar(MAX) select @name=name from( select row_number()over(order by getdate()) as num, name from syscolumns where id=object_id('''+'RKTemp'+'''))a where num=@min set @tt=ISNULL(@tt,'','')+@name+'','' set @min=@min+1 end --print @tt declare @ss varchar(max)set @ss='''' set  @ss=''select * into #temp from '+'RKTemp'+' order by ''+LEFT(@tt,LEN(@tt)-1)+'' delete from '+'RKTemp'+' insert '+'RKTemp'+' select * from #temp drop table #temp''exec(@ss)'exec(@str1)print '将RKTemp按全部item列排序完成'-------------------用RKTemp(即R'[k])计算支持度生成C[K]-------------------declare @number intset @number=1set @CK='C'+cast(@k as varchar(100)) declare @string varchar(max)set @string='create table '+@CK+'('declare @sqlstring varchar(1000)set @sqlstring=''while @number<=@kbeginset @sqlstring=ISNULL(@sqlstring,',')+'item'+CAST(@number as varchar)+' nvarchar(50),'set @number=@number+1endset @sqlstring=@sqlstring+'cnt int,'--print left(@sqlstring,LEN(@sqlstring)-1)set @string=@string+left(@sqlstring,LEN(@sqlstring)-1)+')'exec(@string)declare @group varchar(max)set @group=''set @number=1while @number<=@kbeginset @group=ISNULL(@group,',')+'item'+CAST(@number as varchar)+','set @number=@number+1endset @group=left(@group,LEN(@group)-1)--print '@group='+@groupdeclare @string2 varchar(max)set @string2='insert into '+@CK+' select '+@group+',count(*) from RKTemp group by '+@group+' having count(*)>='+@MinSupport--print '@string2='+@string2exec(@string2)print '用RKTemp计算支持度生成C[K]完成'-----------------用RKTemp和C[K]生成R[K]--------------------------declare @numbert intset @numbert=1set @RK='R'+cast(@k as varchar(100)) declare @stringt varchar(max)set @stringt='create table '+@RK+'('declare @sqlstringt varchar(1000)set @sqlstringt='tx_id int,'while @numbert<=@kbeginset @sqlstringt=ISNULL(@sqlstringt,',')+'item'+CAST(@numbert as varchar)+' nvarchar(50),'set @numbert=@numbert+1end--print left(@sqlstringt,LEN(@sqlstringt)-1)set @stringt=@stringt+left(@sqlstringt,LEN(@sqlstringt)-1)+')'exec(@stringt)declare @string3 varchar(max)set @string3='insert into '+@RK+' select RKTemp.* from RKTemp,'+@CK+'where RKTemp.item1='+@CK+'.item1 and  RKTemp.item'+cast(@k as varchar(10))+'='+@CK+'.item'+cast(@k as varchar(10))--print '@string3='+@string3exec(@string3)print '用RKTemp和C[K]生成R[K]完成'-------------------R'[k]用完要删除----------------drop table RKTemp----------------------@RKNotEmpty重新赋值,若RK为空,则退出循环-------------set @RKNotEmpty='select tx_id into panduan from '+@RKexec(@RKNotEmpty)if not exists(select * from panduan)begindrop table panduanprint @RK+'为空,跳出循环啦!'breakendelsedrop table panduanend----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


原创粉丝点击