// FP.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <string>#include <iostream>#include <vector>#include <map>#include <algorithm>#include <list>using namespace std;const int kMinSupport = 2;map<list<string>, int> FreqPattern;struct TreeNode { int count; string tag; map<string, TreeNode*> children; TreeNode* parent; TreeNode* Sibling; TreeNode(string tag, TreeNode* parent, int count = 1) : tag(tag), parent(parent), count(count) { Sibling = NULL; children.clear(); } bool IsRoot() { return (parent == NULL); } void dump(int k) { for(int i = 0; i < k; i++) { printf("--------"); } if (IsRoot()) { printf("\nRoot \n"); } else { printf("%s:%d \n", tag.c_str(), count); } map<string, TreeNode*>::iterator it; for(it = children.begin(); it != children.end(); it++) { printf("|"); it->second->dump(k+1); } }};void GenConditionalTree(map<vector<string>, int>& Task, map<string, TreeNode*>& head, TreeNode& root, map<string, int>& FreqTable, vector<string>& rank, bool prune);map<string, int>* gFreqTable = NULL;bool Compare(string a, string b) { int aw = (*gFreqTable)[a]; int bw = (*gFreqTable)[b]; return (aw > bw);}void GenPermutation(vector<string>& prefix_set, int start, list<string>& result, int freq, int suffix_len) { // last one; if (start == prefix_set.size() - 1) { result.push_back(prefix_set[start]); if (result.size() <= suffix_len) { return; } list<string>::iterator it; printf("{"); for (it = result.begin(); it != result.end(); it++) { printf("%s ", it->c_str()); } printf("}# %d \n", freq); result.pop_back(); if (result.size() > suffix_len ) { printf("{"); int i; for (i = 0, it = result.begin(); it != result.end(); i++, it++) { printf("%s", it->c_str()); if (i != result.size() - 1) { printf(","); } } printf("}# %d \n", freq); } return; } // choose start; result.push_back(prefix_set[start]); GenPermutation(prefix_set, start + 1, result, freq, suffix_len); result.pop_back(); GenPermutation(prefix_set, start + 1, result, freq, suffix_len);}void FpGrowth(TreeNode& main, string& tag, int tag_freq, map<string, TreeNode*>& head, list<string>& suffix) { // 产生条件基,作为子任务 map<vector<string>, int> sub_task; TreeNode* n = head[tag]; vector<string> prefix_path; do { TreeNode* leaf = n; prefix_path.clear(); int count = leaf->count; while ((leaf = leaf->parent) != NULL) { if (leaf->IsRoot()) { break; } prefix_path.push_back(leaf->tag); } if (prefix_path.size() > 0) { sub_task[prefix_path] = count; } } while ( (n = n->Sibling) != NULL); // 产生条件 FP 树 map<string, TreeNode*> sub_head; TreeNode sub_root("sub_root", NULL); map<string, int> sub_freq_table; vector<string> sub_rank; GenConditionalTree(sub_task, sub_head, sub_root, sub_freq_table, sub_rank, true); sub_root.dump(0); // 单路 TreeNode* tn = &sub_root; bool single_path = true; do { int sons = tn->children.size(); if (sons > 1) { single_path = false; break; } if (sons <= 0) { break; } tn = tn->children.begin()->second; } while (tn); if (single_path) { suffix.push_back(tag); // 产生自己的 pattern; list<string>::iterator it; printf("{"); for (it = suffix.begin(); it != suffix.end(); it++) { printf("%s ", it->c_str()); } printf("}# %d \n", tag_freq); // 产生其他组合的 pattern if (sub_rank.size() > 0) { GenPermutation(sub_rank, 0, suffix, sub_freq_table[sub_rank[0]], suffix.size()); } suffix.pop_back(); return; } // 具有多个路径 suffix.push_back(tag); for (int i = sub_rank.size() - 1; i >= 0; i--) { FpGrowth(sub_root, sub_rank[i], sub_freq_table[sub_rank[i]], sub_head, suffix); }}void GenConditionalTree(map<vector<string>, int>& Task, map<string, TreeNode*>& head, TreeNode& root, map<string, int>& FreqTable, vector<string>& rank, bool prune) { head.clear(); rank.clear(); FreqTable.clear(); // 统计各个项的频率 map<vector<string>, int>::iterator task_it; for (task_it = Task.begin(); task_it != Task.end(); task_it++) { vector<string> buy = task_it->first; for (int i = 0; i < buy.size(); i++) { FreqTable[buy[i]] += task_it->second; } } // 对各个项按照频次进行排序 printf("\n ============================================ \n"); map<string, int>::iterator freq_it; for (freq_it = FreqTable.begin(); freq_it != FreqTable.end(); freq_it++) { printf("[%s] => %d \n", freq_it->first.c_str(), freq_it->second); // 去掉非频繁集 if (prune && freq_it->second < kMinSupport) { continue; } rank.push_back(freq_it->first); } // 进行排序, 所有在 rank 中的项目,才参与建树 gFreqTable = &FreqTable; sort(rank.begin(), rank.end(), Compare); // 产生 FP 树 for (task_it = Task.begin(); task_it != Task.end(); task_it++) { vector<string> buy = task_it->first; // 重新安排task 的顺序 sort(buy.begin(), buy.end(), Compare); TreeNode* cur = &root; int count = task_it->second; for (int i = 0; i < buy.size(); i++) { string& tag = buy[i]; if (FreqTable[tag] < kMinSupport) { continue; } bool is_new = true; if (cur->children.find(tag) != cur->children.end()) { cur = cur->children[tag]; cur->count += count; is_new = false; } else { cur->children[tag] = new TreeNode(tag, cur, count); cur = cur->children[tag]; } if (is_new) { if (head.find(tag) != head.end()) { cur->Sibling = head[tag]; head[tag] = cur; } else { head[tag] = cur; } } } } return;}int _tmain(int argc, _TCHAR* argv[]){ map<vector<string>, int> Task; vector<string> buy; FreqPattern.clear(); buy.clear(); buy.push_back("I1"); buy.push_back("I2"); buy.push_back("I5"); Task[buy]++; buy.clear(); buy.push_back("I2"); buy.push_back("I4"); Task[buy]++; buy.clear(); buy.push_back("I2"); buy.push_back("I3"); Task[buy]++; buy.clear(); buy.push_back("I1"); buy.push_back("I2"); buy.push_back("I4"); Task[buy]++; buy.clear(); buy.push_back("I1"); buy.push_back("I3"); Task[buy]++; buy.clear(); buy.push_back("I2"); buy.push_back("I3"); Task[buy]++; buy.clear(); buy.push_back("I1"); buy.push_back("I3"); Task[buy]++; buy.clear(); buy.push_back("I1"); buy.push_back("I2"); buy.push_back("I3"); buy.push_back("I5"); Task[buy]++; buy.clear(); buy.push_back("I1"); buy.push_back("I2"); buy.push_back("I3"); Task[buy]++; map<string, int> FreqTable; map<string, TreeNode*> head; TreeNode root("root", NULL); vector<string> rank; GenConditionalTree(Task, head, root, FreqTable, rank, true); root.dump(0); // mining the frequency pattern; for (int i = rank.size() - 1; i >= 0; i--) { string& tag = rank[i]; list<string> suffix; suffix.clear(); FpGrowth(root, tag, FreqTable[tag], head, suffix); } getchar();return 0;}