FPTree

来源:互联网 发布:linux下查看java home 编辑:程序博客网 时间:2024/05/21 22:53


FPTree的资料网上很多,就不写了。

我范的错误:1、忽略了深拷贝,有多条记录时,这多条记录是得重新clone的,看来基础还得加强;

                        2、headerTable中必须是全序的;




package com.tur4;import java.util.LinkedList;import java.util.List;import com.tur4.algorithm.FPTree;/** *  * @author cstur4 * email cstur4@gmail.com * */public class test {public static void main(String[] args) {FPTree tree = new FPTree();List<LinkedList<String>> records = tree.readTransactionFile("in.txt", ",");tree.FPGrowth(records, null, 1);tree.showFrequenceSet();}}


package com.tur4.algorithm;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.LinkedList;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.log4j.Logger;import com.tur4.pojo.TreeNode;/** *  * @author cstur4 * email cstur4@gmail.com * */public class FPTree {private static Logger LOG = Logger.getLogger(FPTree.class);private List<LinkedList<String>> transactions = new LinkedList<LinkedList<String>>();private List<String> frequenceSet = new LinkedList<String>();public List<LinkedList<String>> readTransactionFile(String fileName, String separator){BufferedReader br = null;try {br = new BufferedReader(new FileReader(fileName));String str = null;while((str = br.readLine()) != null){LinkedList<String> strs = new LinkedList<String>();String[] ss = str.split(separator);for(String s: ss)strs.add(s);transactions.add(strs);}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}finally{if(br != null)try {br.close();} catch (IOException e) {e.printStackTrace();}LOG.debug("transactions=" + transactions);}return transactions;}public List<TreeNode> buildHeaderTable(List<LinkedList<String>> records, int minSupport ){//LOG.debug("before header table(src)=" + records);Map<String, Long> map = new HashMap<String, Long>();for(LinkedList<String> record:records){for(String s:record){if(map.containsKey(s))map.put(s, map.get(s)+1);elsemap.put(s, 1L);}}List<Entry<String, Long>> table = new LinkedList<Entry<String, Long>>(map.entrySet());Collections.sort(table, new Comparator<Entry<String, Long>>() {@Overridepublic int compare(Entry<String, Long> o1,Entry<String, Long> o2) {return o1.getValue()>o2.getValue()?-1:1;}});List<TreeNode> res = new LinkedList<TreeNode>();for(Entry<String, Long> entry: table){if(entry.getValue() < minSupport)break;TreeNode node = new TreeNode();node.setName(entry.getKey());node.setCount(entry.getValue());res.add(node);}//LOG.debug("after header table=" + res);return res;}private void sortByFrequence(List<LinkedList<String>> records, List<TreeNode> headerTable){final Map<String, Integer> map = new HashMap<String, Integer>();for(int i=0;i<headerTable.size();++i)map.put(headerTable.get(i).getName(), i);/*(can't just compare the number occurs because of the same numbers, we need total order(全序) instead of partial order(偏序) */for(LinkedList<String> record:records){Collections.sort(record, new Comparator<String>(){@Overridepublic int compare(String o1, String o2) {if(!map.containsKey(o1))return 1;if(!map.containsKey(o2))return -1;return map.get(o1)>map.get(o2)?1:-1;}});}LOG.debug("sorted List=" + records);}private void insertNodes(TreeNode root, LinkedList<String> records, List<TreeNode> headerTable){TreeNode subRoot = root;while(records.size()!=0){TreeNode node = new TreeNode();node.setName(records.pop());node.increase();TreeNode lastNode = getLastHomonyNode(node.getName(), headerTable);if(lastNode == null){records.poll();continue;}lastNode.setNextHomony(node);node.setParent(subRoot);subRoot.addChild(node);subRoot = node;}}private TreeNode getLastHomonyNode(String name, List<TreeNode> headerTable){TreeNode node = null;for(TreeNode treeNode: headerTable)if(treeNode.getName()!=null && treeNode.getName().equals(name)){node = treeNode;break;}if(node == null)return null;while(node.getNextHomony()!=null && node.getNextHomony().getName()!=null)node = node.getNextHomony();return node;}private void traceTree(TreeNode root, int blank){for(int i=0;i<blank;++i)System.out.print("  ");System.out.println(root.getName()+"="+root.getCount());if(root.getChildren()!=null)for(TreeNode node:root.getChildren()){traceTree(node, blank+1);}}public TreeNode buildFPTree(List<LinkedList<String>> records, List<TreeNode> headerTable){sortByFrequence(records, headerTable);TreeNode root = new TreeNode();TreeNode subRoot = null;TreeNode tmpNode;for(LinkedList<String> record:records){//LOG.debug("records for build tree=" + records);subRoot = root;while(record.size()>0 && getLastHomonyNode(record.peek(), headerTable)!=null //not frequency item&& (tmpNode = subRoot.findChild(record.peek())) != null){tmpNode.increase();subRoot = tmpNode;record.poll();}insertNodes(subRoot, record, headerTable);}traceTree(root, 0);return root;}private void combination(List<TreeNode> nodes, int i, String itemset, List<String> posfix){if(i == nodes.size()){StringBuilder sb = new StringBuilder();sb.append(itemset);for(String s:posfix)       sb.append(s).append("/");        if(sb.toString().length()>2){frequenceSet.add(sb.toString());LOG.debug(sb.toString() + " added");}return;}TreeNode node = nodes.get(i);combination(nodes, i+1, itemset+node.getName()+"/", posfix);combination(nodes, i+1, itemset, posfix);      }public void FPGrowth(List<LinkedList<String>> records, List<String> pattern, int minSupport){List<TreeNode> headerTable = buildHeaderTable(records, minSupport);LOG.debug("pattern="+pattern+"\theaderTable:"+headerTable);TreeNode root = buildFPTree(records, headerTable);if(records.size() == 1){//单路径combination(headerTable, 0, "", pattern);return;}if(root.getChildren()==null || root.getChildren().size()==0)return;for(int i=headerTable.size()-1;i>=0;i--){TreeNode header = headerTable.get(i);TreeNode headerNode = header;List<LinkedList<String>> CPB = new LinkedList<LinkedList<String>>();while((headerNode = headerNode.getNextHomony()) != null){TreeNode backNode = headerNode;LinkedList<String> preNodes = new LinkedList<String>();while((backNode = backNode.getParent()).getName() != null){preNodes.add(backNode.getName());}long count = headerNode.getCount();if(preNodes.size()!=0)while(count-- > 0 )CPB.add((LinkedList<String>) preNodes.clone());}LinkedList<String> postPattern = new LinkedList<String>();postPattern.add(header.getName());if(pattern != null)postPattern.addAll(pattern);FPGrowth(CPB, postPattern, minSupport);}}public void showFrequenceSet() {for(String s:frequenceSet)System.out.println(s);}}



package com.tur4.pojo;import java.util.LinkedList;import java.util.List;import javax.management.RuntimeErrorException;/** *  * @author cstur4 * email cstur4@gmail.com * */public class TreeNode implements Comparable<TreeNode>{private String name;private Long count = 0L;private TreeNode nextHomony;private List<TreeNode> children;private TreeNode parent;public TreeNode findChild( String name){if(children !=  null)for(int i=0;i<children.size();++i)if(children.get(i).getName()!=null && children.get(i).getName().equals(name))return children.get(i);return null;}public String getName() {return name;}public void setName(String name) {this.name = name;}public Long getCount() {return count;}public void setCount(Long count) {this.count = count;}public TreeNode getNextHomony() {return nextHomony;}public void setNextHomony(TreeNode nextHomony) {this.nextHomony = nextHomony;}public List<TreeNode> getChildren() {return children;}public void setChildren(List<TreeNode> children) {this.children = children;}public TreeNode getParent() {return parent;}public void setParent(TreeNode parent) {this.parent = parent;}public void increase(Long increaseNum){this.count += increaseNum;}public void increase(){this.count ++;}public void addChild(TreeNode node){if(children==null)children = new LinkedList<TreeNode>();children.add(node);}@Overridepublic String toString() {return name + "=" + count;}@Overridepublic boolean equals(Object obj) {TreeNode node = (TreeNode)obj;return node.name.equals(name);}@Overridepublic int compareTo(TreeNode o) {return  this.count-o.count>0?-1:1;}}