机器学习实战—FPGrowth算法的实现
来源:互联网 发布:临沂软件开发 编辑:程序博客网 时间:2024/06/05 00:56
本博文主要讲FPGrwoth算法的java实现,话不多说,直接上代码
1.首先构建FP树所需要的数据结构。
package com.jiang.fpGrowth;import java.util.ArrayList;import java.util.List;import javax.swing.tree.TreeNode;/** *类说明 * @author jiangfeng * @since 2016年1月11日 */public class FP_TreeNode implements Comparable<FP_TreeNode> { private String nodeName;//节点名称 private int count;//计数 private FP_TreeNode parent;//父节点 private List<FP_TreeNode> children; private FP_TreeNode nextNode;//下一个同名节点 public FP_TreeNode(){} public FP_TreeNode(String name){nodeName=name; } public String getNodeName() { return nodeName; } public void setNodeName(String nodeName) { this.nodeName = nodeName; } public int getCount() { return count; } public void setCount(int count) { this.count = count; } public FP_TreeNode getParent() { return parent; } public void setParent(FP_TreeNode parent) { this.parent = parent; } public List<FP_TreeNode> getChildren() { return children; } public void setChildren(List<FP_TreeNode> children) { this.children = children; } public FP_TreeNode getNextNode() { return nextNode; } public void setNextNode(FP_TreeNode nextNode) { this.nextNode = nextNode; } //添加孩子节点 public void addChild(FP_TreeNode child){if(this.getChildren()==null){//孩子节点为空,新建链表,加入孩子节点,再作为该节点的孩子结合 List<FP_TreeNode> list=new ArrayList<>(); list.add(child); this.setChildren(list);}else{ this.getChildren().add(child);} } //查找孩子节点 public FP_TreeNode findChild(String name){List<FP_TreeNode> children=this.getChildren();if(children!=null){ for(FP_TreeNode child:children){if(child.getNodeName().equals(name)){ return child;} }}return null; } //打印孩子节点的名称 public void printChildrenName(){List<FP_TreeNode> children=this.getChildren();if(children!=null){ for(FP_TreeNode child : children){System.out.print(child.getNodeName()+" "); }}else { System.out.println("null");} } public void countCreament(int n){this.count+=n; } //使得Arrays.sort()按照降序排列 @Override public int compareTo(FP_TreeNode node){int count=node.getCount();return count-this.count; } }
2.实现FPGrowth算法
package com.jiang.fpGrowth;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.LinkedList;import java.util.List;import java.util.Map;import java.util.Map.Entry;import javax.swing.tree.TreeNode;import java.util.Set;public class FPGrowth { private int minSupport; public int getMinSupport() { return minSupport; } public void setMinSupport(int minSupport) { this.minSupport = minSupport; } //FPGrowth算法 public void FPGrowth(List<List<String>> transRecords,List<String> postPattern){ArrayList<FP_TreeNode> headerTable=buildHeaderTable(transRecords);FP_TreeNode treeRoot=buildFPTree(transRecords, headerTable);if(treeRoot.getChildren()==null||treeRoot.getChildren().size()==0){ return;}if(postPattern!=null){ for(FP_TreeNode header:headerTable){System.out.print(header.getCount()+"\t"+header.getNodeName());for(String ele:postPattern){ System.out.print("\t"+ele);}System.out.println(); }}for(FP_TreeNode header:headerTable){ List<String> newPostPattern =new LinkedList<>(); newPostPattern.add(header.getNodeName()); if(postPattern!=null){newPostPattern.addAll(postPattern); } List<List<String>> newTransRecords =new LinkedList<List<String>>(); FP_TreeNode backNode=header.getNextNode(); while(backNode!=null){int counter =backNode.getCount();List<String> preNodes=new ArrayList<>();FP_TreeNode parent=backNode;while((parent=parent.getParent()).getNodeName()!=null){ preNodes.add(parent.getNodeName());}while(counter-->0){ newTransRecords.add(preNodes);}backNode=backNode.getNextNode(); } FPGrowth(newTransRecords, newPostPattern); } } /* * @transRecords:交易记录 * @ return: 频繁1项集 */ public ArrayList<FP_TreeNode> buildHeaderTable(List<List<String>> transRecords){ArrayList<FP_TreeNode> F1=null;if(transRecords.size()>0){ F1=new ArrayList<>(); Map<String, FP_TreeNode> map=new HashMap<>(); for(List<String> record:transRecords){for(String item:record){ if(map.keySet().contains(item)){map.get(item).countCreament(1); } else{FP_TreeNode node =new FP_TreeNode(item);node.setCount(1);map.put(item, node); }} } //支持度大于minSupport的放入F1中 Set<String> names=map.keySet(); for(String name:names){FP_TreeNode tmpNode=map.get(name);if(tmpNode.getCount()>=minSupport){ F1.add(tmpNode);} } Collections.sort(F1); return F1;}return null; } //构建FP-Tree public FP_TreeNode buildFPTree(List<List<String>> transRecords,ArrayList<FP_TreeNode> F1){FP_TreeNode root=new FP_TreeNode();//创建树的根节点for(List<String> transRecord:transRecords){ LinkedList<String> record=sortByF1(transRecord, F1);//根据F1频繁项集对每条记录排序 FP_TreeNode subTreeRoot=root; FP_TreeNode tmpRoot=null; if(root.getChildren()!=null){while(!record.isEmpty()&&(tmpRoot=subTreeRoot.findChild(record.peek()))!=null){ tmpRoot.countCreament(1); subTreeRoot=tmpRoot; record.poll(); } } addNodes(subTreeRoot,record,F1); }return root; } private void addNodes(FP_TreeNode subTreeRoot, LinkedList<String> record, ArrayList<FP_TreeNode> F1) {// TODO Auto-generated method stubif(record.size()>0){ while(record.size()>0){String item=record.poll();FP_TreeNode leafNode=new FP_TreeNode(item);leafNode.setCount(1);leafNode.setParent(subTreeRoot);subTreeRoot.addChild(leafNode);for(FP_TreeNode f1:F1){ if(f1.getNodeName().equals(item)){while(f1.getNextNode()!=null){ f1=f1.getNextNode();}f1.setNextNode(leafNode);break; }}addNodes(leafNode, record, F1); }} } //把交易记录按照项的频繁程度降序排列 public LinkedList<String> sortByF1(List<String> transRecord,ArrayList<FP_TreeNode> F1){ Map<String,Integer> map=new HashMap<>(); for(String item:transRecord){ for(int i=0;i<F1.size();i++){ FP_TreeNode tmpNode=F1.get(i); if (tmpNode.getNodeName().equals(item)) { map.put(item, i); } } } ArrayList<Entry<String, Integer>> al=new ArrayList<Entry<String, Integer>>(map.entrySet()); Collections.sort(al,new Comparator<Map.Entry<String, Integer>>(){ @Override public int compare(Entry<String, Integer> et,Entry<String, Integer> et1){ //降序排列 return et.getValue()- et1.getValue(); } }); LinkedList<String> res =new LinkedList<>(); for(Entry<String, Integer> entry:al){ res.add(entry.getKey()); } return res; } }
3.运行FPGrowth算法
package com.jiang.fpGrowth;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.LinkedList;import java.util.List;public class TestFPGrowth { public List<List<String>> readTransRecords(String file){List<List<String>> transRecord=new LinkedList<List<String>>();try { BufferedReader bf=new BufferedReader(new FileReader(file)); String line; List<String> record; while((line=bf.readLine())!=null){if(line.trim().length()>0){ // System.out.println(line); String[] str=line.split(" ");// for(int i=0;i<str.length;i++){//System.out.print(str[i]+" ");// } record=new LinkedList<String>(); for(String string:str){record.add(string); } transRecord.add(record);} } } catch (Exception e) { // TODO: handle exception e.printStackTrace();}return transRecord; } public static void main(String[] args) {FPGrowth fpGrowth=new FPGrowth();fpGrowth.setMinSupport(3);TestFPGrowth res=new TestFPGrowth();String file="F:\\FPGrowth\\record.txt";List<List<String>> transRecords=res.readTransRecords(file);/*for(int i=0;i<transRecords.size();i++){ for(int j=0;j<transRecords.get(i).size();j++){System.out.print(transRecords.get(i).get(j)+" "); } System.out.println();}*/ArrayList<FP_TreeNode> list=fpGrowth.buildHeaderTable(transRecords);/*for(int i=0;i<list.size();i++){ System.out.print(list.get(i).getNodeName()+" ");}*/fpGrowth.FPGrowth(transRecords, null); } }record.txt 文件如下
r z h j pz y x w v u t s zr x n o sy r x z q t py z x e q s t m
4.实验结果
1 0
- 机器学习实战—FPGrowth算法的实现
- 【机器学习系列2】FPGrowth算法与spark实现
- fpgrowth算法的学习笔记
- spark机器学习实现之fpgrowth
- FPGrowth算法实现
- 《机器学习实战》决策树(ID3算法)的分析与实现
- 机器学习实战笔记(三):决策树算法的Python实现
- 《机器学习实战》AdaBoost算法的分析与实现
- apriori算法的代码,python实现,参考《机器学习实战》
- 《机器学习实战》——K近邻算法实现
- 《机器学习实战》——k-近邻算法实现
- 机器学习实战——python实现knn算法
- Spark的模式挖掘—FPGrowth算法
- FPGrowth的java实现
- Apriori和FPgrowth算法学习
- 无监督学习-FPgrowth算法
- 《机器学习实战》学习笔记——K-近邻算法(KNN)(一)分类器的简单实现
- 机器学习实战——kNN算法
- C编译器编译结构体时的对齐原则
- 解决Animation 添加AnimationClip 无效的问题
- 搭建marathon和mesos单机环境
- 使用MyBatis Generator自动创建代码
- 缩放文本框ExpandTextView
- 机器学习实战—FPGrowth算法的实现
- arcgisserver发布服务详细流程
- Java并发编程:线程池创建及源码分析
- neutron 基本原理
- Linux shell下的颜色含义
- 简单实用ssm+json+ajax三级联动
- linux :安装中文输入法
- 多尺度竞争卷积
- js调用父框架函数与弹窗调用父页面函数的方法