FPgrowth实现
来源:互联网 发布:小旭音乐 知乎 编辑:程序博客网 时间:2024/06/05 18:16
package apriori;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class FPTree {
private int minSuport;
private long cnt;
private Map<String, Integer> freqMap;
private Map<List<String>, Integer> frequentMap = new HashMap<List<String>, Integer>();
public Map<List<String>, Integer> getFrequentItems() {
return frequentMap;
}
public void setMinSuport(int minSuport) {
this.minSuport = minSuport;
}
/**
* 计算事务集中每一项的频数
*
* @param transRecords
* @return
*/
private Map<String, Integer> getFrequency(List<List<String>> records) {
Map<String, Integer> rect = new HashMap<String, Integer>();
for (List<String> record : records) {
for (String item : record) {
Integer cnt = rect.get(item);
if (cnt == null) {
cnt = new Integer(0);
}
rect.put(item, ++cnt);
}
}
return rect;
}
public void buildFPTree(List<List<String>> records) {
cnt = records.size();
freqMap = getFrequency(records);
for (List<String> record : records) {
Collections.sort(record, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return freqMap.get(o2) - freqMap.get(o1);
}
});
}
FPGrowth(records, null);
}
/**
* FP树递归生长,从而得到所有的频繁模式
*
* @param cpb
* 条件模式基
* @param postModel
* 后缀模式
*/
private void FPGrowth(List<List<String>> cpb, LinkedList<String> postModel) {
Map<String, TreeNode> headers = new HashMap<String, TreeNode>();
for (Entry<String, Integer> entry : freqMap.entrySet()) {
String name = entry.getKey();
int cnt = entry.getValue();
// 每一次递归时都有可能出现一部分模式的频数低于阈值
if (cnt >= minSuport) {
TreeNode node = new TreeNode(name);
node.setCount(cnt);
headers.put(name, node);
}
}
TreeNode treeRoot = buildSubTree(cpb, freqMap, headers);
// 如果只剩下虚根节点,则递归结束
if ((treeRoot.getChildren() == null)
|| (treeRoot.getChildren().size() == 0)) {
return;
}
for (TreeNode header : headers.values()) {
List<String> rule = new ArrayList<String>();
rule.add(header.getName());
if (postModel != null) {
rule.addAll(postModel);
}
frequentMap.put(rule, header.getCount());
LinkedList<String> newPostPattern = new LinkedList<String>();
newPostPattern.add(header.getName());
if (postModel != null) {
newPostPattern.addAll(postModel);
}
List<List<String>> newCPB = new LinkedList<List<String>>();
TreeNode nextNode = header;
while ((nextNode = nextNode.getNext()) != null) {
int counter = nextNode.getCount();
LinkedList<String> path = new LinkedList<String>();
TreeNode parent = nextNode;
while ((parent = parent.getParent()).getName() != null) {// 虚根节点的name为null
path.push(parent.getName());// 往表头插入
}
// 事务要重复添加counter次
while (counter-- > 0) {
newCPB.add(path);
}
}
cnt = newCPB.size();
freqMap = getFrequency(newCPB);
for (List<String> record : newCPB) {
Collections.sort(record, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return freqMap.get(o2) - freqMap.get(o1);
}
});
}
FPGrowth(newCPB, newPostPattern);
}
}
private boolean isSingleBranch(TreeNode treeRoot) {
// TODO Auto-generated method stub
return false;
}
private TreeNode buildSubTree(List<List<String>> cpb,
Map<String, Integer> freqMap, Map<String, TreeNode> headers) {
// TODO Auto-generated method stub
TreeNode root = new TreeNode();
for (List<String> transRecord : cpb) {
LinkedList<String> record = new LinkedList<String>(transRecord);
TreeNode subTreeRoot = root;
TreeNode tmpRoot = null;
if (root.getChildren() != null) {
while (!record.isEmpty()
&& (tmpRoot = subTreeRoot.findChild(record.peek())) != null) {
tmpRoot.countIncrement(1);
subTreeRoot = tmpRoot;
record.poll();
}
}
// 长出新的节点
addNodes(subTreeRoot, record, headers);
}
return root;
}
private void addNodes(TreeNode ancestor, LinkedList<String> record,
Map<String, TreeNode> headers) {
// TODO Auto-generated method stub
Iterator<String> it = record.iterator();
while (it.hasNext()) {
String item = it.next();
if (headers.containsKey(item)) {
TreeNode leafnode = new TreeNode(item);
leafnode.setCount(1);
leafnode.setParent(ancestor);
ancestor.addChild(leafnode);
TreeNode header = headers.get(item);
while (header.getNext() != null) {
header = header.getNext();
}
header.setNext(leafnode);
ancestor = leafnode;
}
}
}
/**
* 从若干个文件中读入Transaction Record,同时把所有项设置为decideAttr
*
* @param filenames
* @return
* @Description:
*/
public List<List<String>> readTransRocords(String[] filenames) {
Set<String> set = new HashSet<String>();
List<List<String>> transaction = null;
if (filenames.length > 0) {
transaction = new LinkedList<List<String>>();
for (String filename : filenames) {
try {
FileReader fr = new FileReader(filename);
BufferedReader br = new BufferedReader(fr);
try {
String line = null;
// 一项事务占一行
while ((line = br.readLine()) != null) {
if (line.trim().length() > 0) {
// 每个item之间用","分隔
String[] str = line.split(",");
// 每一项事务中的重复项需要排重
Set<String> record = new HashSet<String>();
for (String w : str) {
record.add(w);
set.add(w);
}
List<String> rl = new ArrayList<String>();
rl.addAll(record);
transaction.add(rl);
}
}
} finally {
br.close();
}
} catch (IOException ex) {
System.out.println("Read transaction records failed."
+ ex.getMessage());
System.exit(1);
}
}
}
return transaction;
}
public static void main(String[] args) throws IOException {
FPTree fp = new FPTree();
fp.setMinSuport(100);
List<List<String>> records = fp
.readTransRocords(new String[] { "pattens.cvs" });
long beginTime = System.currentTimeMillis();
fp.buildFPTree(records);
long endTime = System.currentTimeMillis();
System.out.println(endTime - beginTime);
Map<List<String>, Integer> pattens = fp.getFrequentItems();
String outfile = "pattens.txt";
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
System.out.println("模式\t频数");
bw.write("模式\t频数");
bw.newLine();
for (Entry<List<String>, Integer> entry : pattens.entrySet()) {
System.out.println(entry.getKey() + "\t" + entry.getValue());
bw.write(joinList(entry.getKey()) + "\t" + entry.getValue());
bw.newLine();
}
bw.close();
}
private static String joinList(List<String> list) {
if (list == null || list.size() == 0) {
return "";
}
StringBuilder sb = new StringBuilder();
for (String ele : list) {
sb.append(ele);
sb.append(",");
}
// 把最后一个逗号去掉
return sb.substring(0, sb.length() - 1);
}
}
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class FPTree {
private int minSuport;
private long cnt;
private Map<String, Integer> freqMap;
private Map<List<String>, Integer> frequentMap = new HashMap<List<String>, Integer>();
public Map<List<String>, Integer> getFrequentItems() {
return frequentMap;
}
public void setMinSuport(int minSuport) {
this.minSuport = minSuport;
}
/**
* 计算事务集中每一项的频数
*
* @param transRecords
* @return
*/
private Map<String, Integer> getFrequency(List<List<String>> records) {
Map<String, Integer> rect = new HashMap<String, Integer>();
for (List<String> record : records) {
for (String item : record) {
Integer cnt = rect.get(item);
if (cnt == null) {
cnt = new Integer(0);
}
rect.put(item, ++cnt);
}
}
return rect;
}
public void buildFPTree(List<List<String>> records) {
cnt = records.size();
freqMap = getFrequency(records);
for (List<String> record : records) {
Collections.sort(record, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return freqMap.get(o2) - freqMap.get(o1);
}
});
}
FPGrowth(records, null);
}
/**
* FP树递归生长,从而得到所有的频繁模式
*
* @param cpb
* 条件模式基
* @param postModel
* 后缀模式
*/
private void FPGrowth(List<List<String>> cpb, LinkedList<String> postModel) {
Map<String, TreeNode> headers = new HashMap<String, TreeNode>();
for (Entry<String, Integer> entry : freqMap.entrySet()) {
String name = entry.getKey();
int cnt = entry.getValue();
// 每一次递归时都有可能出现一部分模式的频数低于阈值
if (cnt >= minSuport) {
TreeNode node = new TreeNode(name);
node.setCount(cnt);
headers.put(name, node);
}
}
TreeNode treeRoot = buildSubTree(cpb, freqMap, headers);
// 如果只剩下虚根节点,则递归结束
if ((treeRoot.getChildren() == null)
|| (treeRoot.getChildren().size() == 0)) {
return;
}
for (TreeNode header : headers.values()) {
List<String> rule = new ArrayList<String>();
rule.add(header.getName());
if (postModel != null) {
rule.addAll(postModel);
}
frequentMap.put(rule, header.getCount());
LinkedList<String> newPostPattern = new LinkedList<String>();
newPostPattern.add(header.getName());
if (postModel != null) {
newPostPattern.addAll(postModel);
}
List<List<String>> newCPB = new LinkedList<List<String>>();
TreeNode nextNode = header;
while ((nextNode = nextNode.getNext()) != null) {
int counter = nextNode.getCount();
LinkedList<String> path = new LinkedList<String>();
TreeNode parent = nextNode;
while ((parent = parent.getParent()).getName() != null) {// 虚根节点的name为null
path.push(parent.getName());// 往表头插入
}
// 事务要重复添加counter次
while (counter-- > 0) {
newCPB.add(path);
}
}
cnt = newCPB.size();
freqMap = getFrequency(newCPB);
for (List<String> record : newCPB) {
Collections.sort(record, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return freqMap.get(o2) - freqMap.get(o1);
}
});
}
FPGrowth(newCPB, newPostPattern);
}
}
private boolean isSingleBranch(TreeNode treeRoot) {
// TODO Auto-generated method stub
return false;
}
private TreeNode buildSubTree(List<List<String>> cpb,
Map<String, Integer> freqMap, Map<String, TreeNode> headers) {
// TODO Auto-generated method stub
TreeNode root = new TreeNode();
for (List<String> transRecord : cpb) {
LinkedList<String> record = new LinkedList<String>(transRecord);
TreeNode subTreeRoot = root;
TreeNode tmpRoot = null;
if (root.getChildren() != null) {
while (!record.isEmpty()
&& (tmpRoot = subTreeRoot.findChild(record.peek())) != null) {
tmpRoot.countIncrement(1);
subTreeRoot = tmpRoot;
record.poll();
}
}
// 长出新的节点
addNodes(subTreeRoot, record, headers);
}
return root;
}
private void addNodes(TreeNode ancestor, LinkedList<String> record,
Map<String, TreeNode> headers) {
// TODO Auto-generated method stub
Iterator<String> it = record.iterator();
while (it.hasNext()) {
String item = it.next();
if (headers.containsKey(item)) {
TreeNode leafnode = new TreeNode(item);
leafnode.setCount(1);
leafnode.setParent(ancestor);
ancestor.addChild(leafnode);
TreeNode header = headers.get(item);
while (header.getNext() != null) {
header = header.getNext();
}
header.setNext(leafnode);
ancestor = leafnode;
}
}
}
/**
* 从若干个文件中读入Transaction Record,同时把所有项设置为decideAttr
*
* @param filenames
* @return
* @Description:
*/
public List<List<String>> readTransRocords(String[] filenames) {
Set<String> set = new HashSet<String>();
List<List<String>> transaction = null;
if (filenames.length > 0) {
transaction = new LinkedList<List<String>>();
for (String filename : filenames) {
try {
FileReader fr = new FileReader(filename);
BufferedReader br = new BufferedReader(fr);
try {
String line = null;
// 一项事务占一行
while ((line = br.readLine()) != null) {
if (line.trim().length() > 0) {
// 每个item之间用","分隔
String[] str = line.split(",");
// 每一项事务中的重复项需要排重
Set<String> record = new HashSet<String>();
for (String w : str) {
record.add(w);
set.add(w);
}
List<String> rl = new ArrayList<String>();
rl.addAll(record);
transaction.add(rl);
}
}
} finally {
br.close();
}
} catch (IOException ex) {
System.out.println("Read transaction records failed."
+ ex.getMessage());
System.exit(1);
}
}
}
return transaction;
}
public static void main(String[] args) throws IOException {
FPTree fp = new FPTree();
fp.setMinSuport(100);
List<List<String>> records = fp
.readTransRocords(new String[] { "pattens.cvs" });
long beginTime = System.currentTimeMillis();
fp.buildFPTree(records);
long endTime = System.currentTimeMillis();
System.out.println(endTime - beginTime);
Map<List<String>, Integer> pattens = fp.getFrequentItems();
String outfile = "pattens.txt";
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
System.out.println("模式\t频数");
bw.write("模式\t频数");
bw.newLine();
for (Entry<List<String>, Integer> entry : pattens.entrySet()) {
System.out.println(entry.getKey() + "\t" + entry.getValue());
bw.write(joinList(entry.getKey()) + "\t" + entry.getValue());
bw.newLine();
}
bw.close();
}
private static String joinList(List<String> list) {
if (list == null || list.size() == 0) {
return "";
}
StringBuilder sb = new StringBuilder();
for (String ele : list) {
sb.append(ele);
sb.append(",");
}
// 把最后一个逗号去掉
return sb.substring(0, sb.length() - 1);
}
}
0 0
- FPGrowth 实现
- FPgrowth实现
- FPGrowth算法实现
- FPGrowth的java实现
- FPGrowth
- FPgrowth
- 关联规则FpGrowth算法 Java实现
- spark机器学习实现之fpgrowth
- 数据挖掘笔记-关联规则-FPGrowth-简单实现
- 数据挖掘笔记-关联规则-FPGrowth-MapReduce实现
- 机器学习实战—FPGrowth算法的实现
- zz FPtree 和 FPgrowth 算法和 Java 实现
- 【机器学习系列2】FPGrowth算法与spark实现
- mahout FpGrowth
- FPGrowth算法
- Spark MLlib FPGrowth算法
- FPGrowth算法理论
- Spark MLlib FPGrowth算法
- http与https的理解
- Hadoop回顾--Hive常用函数
- 类型强转的那些坑
- 欢迎使用CSDN-markdown编辑器
- apriori算法实现
- FPgrowth实现
- 上传下载
- VM虚拟主机设置网络
- 单链表实现查找中间结点
- VC++ 防火墙 Win7 XP MFC
- 数据库--索引的类型及特点
- jQuery学习笔记(6)——自定义插件
- UVa272
- K-Means Python实现