机器学习实战—FPGrowth算法的实现

来源:互联网 发布:临沂软件开发 编辑:程序博客网 时间:2024/06/05 00:56

本博文主要讲FPGrwoth算法的java实现,话不多说,直接上代码

1.首先构建FP树所需要的数据结构。

package com.jiang.fpGrowth;import java.util.ArrayList;import java.util.List;import javax.swing.tree.TreeNode;/** *类说明 * @author jiangfeng * @since 2016年1月11日 */public class FP_TreeNode implements Comparable<FP_TreeNode> {    private String nodeName;//节点名称    private int count;//计数    private FP_TreeNode parent;//父节点    private List<FP_TreeNode> children;    private FP_TreeNode nextNode;//下一个同名节点        public FP_TreeNode(){}        public FP_TreeNode(String name){nodeName=name;    }    public String getNodeName() {        return nodeName;    }    public void setNodeName(String nodeName) {        this.nodeName = nodeName;    }    public int getCount() {        return count;    }    public void setCount(int count) {        this.count = count;    }    public FP_TreeNode getParent() {        return parent;    }    public void setParent(FP_TreeNode parent) {        this.parent = parent;    }    public List<FP_TreeNode> getChildren() {        return children;    }    public void setChildren(List<FP_TreeNode> children) {        this.children = children;    }    public FP_TreeNode getNextNode() {        return nextNode;    }    public void setNextNode(FP_TreeNode nextNode) {        this.nextNode = nextNode;    }    //添加孩子节点    public void addChild(FP_TreeNode child){if(this.getChildren()==null){//孩子节点为空,新建链表,加入孩子节点,再作为该节点的孩子结合    List<FP_TreeNode> list=new ArrayList<>();    list.add(child);    this.setChildren(list);}else{    this.getChildren().add(child);}    }        //查找孩子节点    public FP_TreeNode findChild(String name){List<FP_TreeNode> children=this.getChildren();if(children!=null){    for(FP_TreeNode child:children){if(child.getNodeName().equals(name)){    return child;}    }}return null;    }        //打印孩子节点的名称    public void printChildrenName(){List<FP_TreeNode> children=this.getChildren();if(children!=null){    for(FP_TreeNode child : children){System.out.print(child.getNodeName()+" ");    }}else {    System.out.println("null");}    }            public void countCreament(int n){this.count+=n;    }        //使得Arrays.sort()按照降序排列    @Override    public int compareTo(FP_TreeNode node){int count=node.getCount();return count-this.count;    }    }

2.实现FPGrowth算法

package com.jiang.fpGrowth;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.LinkedList;import java.util.List;import java.util.Map;import java.util.Map.Entry;import javax.swing.tree.TreeNode;import java.util.Set;public class FPGrowth {    private int minSupport;    public int getMinSupport() {        return minSupport;    }    public void setMinSupport(int minSupport) {        this.minSupport = minSupport;    }        //FPGrowth算法    public void FPGrowth(List<List<String>> transRecords,List<String> postPattern){ArrayList<FP_TreeNode> headerTable=buildHeaderTable(transRecords);FP_TreeNode treeRoot=buildFPTree(transRecords, headerTable);if(treeRoot.getChildren()==null||treeRoot.getChildren().size()==0){    return;}if(postPattern!=null){    for(FP_TreeNode header:headerTable){System.out.print(header.getCount()+"\t"+header.getNodeName());for(String ele:postPattern){    System.out.print("\t"+ele);}System.out.println();    }}for(FP_TreeNode header:headerTable){    List<String> newPostPattern =new LinkedList<>();        newPostPattern.add(header.getNodeName());    if(postPattern!=null){newPostPattern.addAll(postPattern);    }    List<List<String>> newTransRecords =new LinkedList<List<String>>();    FP_TreeNode backNode=header.getNextNode();    while(backNode!=null){int counter =backNode.getCount();List<String> preNodes=new ArrayList<>();FP_TreeNode parent=backNode;while((parent=parent.getParent()).getNodeName()!=null){    preNodes.add(parent.getNodeName());}while(counter-->0){    newTransRecords.add(preNodes);}backNode=backNode.getNextNode();    }    FPGrowth(newTransRecords, newPostPattern);    }    }        /*     * @transRecords:交易记录     * @ return: 频繁1项集     */    public ArrayList<FP_TreeNode> buildHeaderTable(List<List<String>> transRecords){ArrayList<FP_TreeNode> F1=null;if(transRecords.size()>0){    F1=new ArrayList<>();    Map<String, FP_TreeNode> map=new HashMap<>();        for(List<String> record:transRecords){for(String item:record){    if(map.keySet().contains(item)){map.get(item).countCreament(1);    }    else{FP_TreeNode node =new FP_TreeNode(item);node.setCount(1);map.put(item, node);    }}    }    //支持度大于minSupport的放入F1中    Set<String> names=map.keySet();    for(String name:names){FP_TreeNode tmpNode=map.get(name);if(tmpNode.getCount()>=minSupport){    F1.add(tmpNode);}    }        Collections.sort(F1);        return F1;}return null;    }    //构建FP-Tree        public FP_TreeNode buildFPTree(List<List<String>> transRecords,ArrayList<FP_TreeNode> F1){FP_TreeNode root=new FP_TreeNode();//创建树的根节点for(List<String> transRecord:transRecords){        LinkedList<String>  record=sortByF1(transRecord, F1);//根据F1频繁项集对每条记录排序    FP_TreeNode subTreeRoot=root;    FP_TreeNode tmpRoot=null;        if(root.getChildren()!=null){while(!record.isEmpty()&&(tmpRoot=subTreeRoot.findChild(record.peek()))!=null){    tmpRoot.countCreament(1);    subTreeRoot=tmpRoot;    record.poll();    }    }    addNodes(subTreeRoot,record,F1);    }return root;    }            private void addNodes(FP_TreeNode subTreeRoot, LinkedList<String> record, ArrayList<FP_TreeNode> F1) {// TODO Auto-generated method stubif(record.size()>0){    while(record.size()>0){String item=record.poll();FP_TreeNode leafNode=new FP_TreeNode(item);leafNode.setCount(1);leafNode.setParent(subTreeRoot);subTreeRoot.addChild(leafNode);for(FP_TreeNode f1:F1){    if(f1.getNodeName().equals(item)){while(f1.getNextNode()!=null){    f1=f1.getNextNode();}f1.setNextNode(leafNode);break;    }}addNodes(leafNode, record, F1);    }}    }    //把交易记录按照项的频繁程度降序排列   public LinkedList<String> sortByF1(List<String> transRecord,ArrayList<FP_TreeNode> F1){       Map<String,Integer> map=new HashMap<>();       for(String item:transRecord){   for(int i=0;i<F1.size();i++){       FP_TreeNode tmpNode=F1.get(i);       if (tmpNode.getNodeName().equals(item)) {   map.put(item, i);       }   }       }       ArrayList<Entry<String, Integer>> al=new ArrayList<Entry<String, Integer>>(map.entrySet());       Collections.sort(al,new Comparator<Map.Entry<String, Integer>>(){   @Override   public int compare(Entry<String, Integer> et,Entry<String, Integer> et1){       //降序排列      return et.getValue()- et1.getValue();   }       });       LinkedList<String> res =new LinkedList<>();       for(Entry<String, Integer> entry:al){   res.add(entry.getKey());       }              return res;   } }

3.运行FPGrowth算法

package com.jiang.fpGrowth;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.LinkedList;import java.util.List;public class TestFPGrowth {    public List<List<String>> readTransRecords(String file){List<List<String>> transRecord=new LinkedList<List<String>>();try {   BufferedReader bf=new BufferedReader(new FileReader(file));    String line;    List<String> record;    while((line=bf.readLine())!=null){if(line.trim().length()>0){   // System.out.println(line);    String[] str=line.split(" ");//    for(int i=0;i<str.length;i++){//System.out.print(str[i]+" ");//    }    record=new LinkedList<String>();    for(String string:str){record.add(string);    }    transRecord.add(record);}    }    } catch (Exception e) {    // TODO: handle exception    e.printStackTrace();}return transRecord;    }        public static void main(String[] args) {FPGrowth fpGrowth=new FPGrowth();fpGrowth.setMinSupport(3);TestFPGrowth res=new TestFPGrowth();String file="F:\\FPGrowth\\record.txt";List<List<String>> transRecords=res.readTransRecords(file);/*for(int i=0;i<transRecords.size();i++){    for(int j=0;j<transRecords.get(i).size();j++){System.out.print(transRecords.get(i).get(j)+" ");    }    System.out.println();}*/ArrayList<FP_TreeNode> list=fpGrowth.buildHeaderTable(transRecords);/*for(int i=0;i<list.size();i++){    System.out.print(list.get(i).getNodeName()+" ");}*/fpGrowth.FPGrowth(transRecords, null);    }       }
record.txt  文件如下

r z h j pz y x w v u t s zr x n o sy r x z q t py z x e q s t m

4.实验结果


1 0
原创粉丝点击