FP tree 算法 C++ 实现

来源:互联网 发布:遍历二叉树非递归c语言 编辑:程序博客网 时间:2024/05/29 15:05
// FP.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <string>#include <iostream>#include <vector>#include <map>#include <algorithm>#include <list>using namespace std;const int kMinSupport = 2;map<list<string>, int>  FreqPattern;struct TreeNode {  int count;  string tag;  map<string, TreeNode*> children;  TreeNode* parent;  TreeNode* Sibling;  TreeNode(string tag, TreeNode* parent, int count = 1) : tag(tag), parent(parent), count(count) {    Sibling = NULL;    children.clear();  }  bool IsRoot() {    return (parent == NULL);  }  void dump(int k) {    for(int i = 0; i < k; i++) {      printf("--------");    }    if (IsRoot()) {      printf("\nRoot \n");    } else {      printf("%s:%d \n", tag.c_str(), count);    }    map<string, TreeNode*>::iterator  it;    for(it = children.begin(); it != children.end(); it++) {      printf("|");      it->second->dump(k+1);    }  }};void GenConditionalTree(map<vector<string>, int>& Task,                         map<string, TreeNode*>& head,                        TreeNode& root,                        map<string, int>& FreqTable,                        vector<string>& rank,                        bool prune);map<string, int>* gFreqTable = NULL;bool Compare(string a, string b) {  int aw = (*gFreqTable)[a];  int bw = (*gFreqTable)[b];  return (aw > bw);}void GenPermutation(vector<string>& prefix_set, int start, list<string>& result, int freq, int suffix_len) {  // last one;  if (start == prefix_set.size() - 1) {    result.push_back(prefix_set[start]);    if (result.size() <= suffix_len) {      return;    }    list<string>::iterator it;        printf("{");    for (it = result.begin(); it != result.end(); it++) {      printf("%s ", it->c_str());    }    printf("}# %d \n", freq);    result.pop_back();    if (result.size() > suffix_len ) {      printf("{");      int i;      for (i = 0, it = result.begin(); it != result.end(); i++, it++) {        printf("%s", it->c_str());        if (i != result.size() - 1) {          printf(",");        }      }      printf("}# %d \n", freq);    }    return;  }  // choose start;  result.push_back(prefix_set[start]);  GenPermutation(prefix_set, start + 1, result, freq, suffix_len);  result.pop_back();  GenPermutation(prefix_set, start + 1, result, freq, suffix_len);}void FpGrowth(TreeNode& main, string& tag, int tag_freq, map<string, TreeNode*>& head, list<string>& suffix) {  // 产生条件基,作为子任务  map<vector<string>, int> sub_task;  TreeNode* n = head[tag];  vector<string> prefix_path;  do {    TreeNode* leaf = n;    prefix_path.clear();    int count = leaf->count;        while ((leaf = leaf->parent) != NULL) {      if (leaf->IsRoot()) {        break;      }      prefix_path.push_back(leaf->tag);    }    if (prefix_path.size() > 0) {      sub_task[prefix_path] = count;    }  } while ( (n = n->Sibling) != NULL);  // 产生条件 FP 树  map<string, TreeNode*> sub_head;  TreeNode sub_root("sub_root", NULL);  map<string, int> sub_freq_table;  vector<string> sub_rank;  GenConditionalTree(sub_task, sub_head, sub_root, sub_freq_table, sub_rank, true);  sub_root.dump(0);  // 单路  TreeNode* tn = &sub_root;  bool single_path = true;  do {    int sons = tn->children.size();    if (sons > 1) {      single_path = false;      break;    }     if (sons <= 0) {      break;    }    tn = tn->children.begin()->second;  } while (tn);      if (single_path) {    suffix.push_back(tag);    // 产生自己的 pattern;    list<string>::iterator  it;    printf("{");    for (it = suffix.begin(); it != suffix.end(); it++) {      printf("%s ", it->c_str());    }    printf("}# %d \n", tag_freq);    // 产生其他组合的 pattern    if (sub_rank.size() > 0) {      GenPermutation(sub_rank, 0, suffix, sub_freq_table[sub_rank[0]], suffix.size());    }    suffix.pop_back();    return;  }  // 具有多个路径  suffix.push_back(tag);  for (int i = sub_rank.size() - 1; i >= 0; i--) {    FpGrowth(sub_root, sub_rank[i], sub_freq_table[sub_rank[i]], sub_head, suffix);  }}void GenConditionalTree(map<vector<string>, int>& Task,                         map<string, TreeNode*>& head,                        TreeNode& root,                        map<string, int>& FreqTable,                        vector<string>& rank,                        bool prune) {  head.clear();  rank.clear();  FreqTable.clear();  // 统计各个项的频率  map<vector<string>, int>::iterator  task_it;  for (task_it = Task.begin(); task_it != Task.end(); task_it++) {    vector<string> buy = task_it->first;    for (int i = 0; i < buy.size(); i++) {      FreqTable[buy[i]] += task_it->second;    }  }  // 对各个项按照频次进行排序  printf("\n ============================================ \n");  map<string, int>::iterator freq_it;  for (freq_it = FreqTable.begin(); freq_it != FreqTable.end(); freq_it++) {    printf("[%s] => %d \n", freq_it->first.c_str(), freq_it->second);    // 去掉非频繁集    if (prune && freq_it->second < kMinSupport) {      continue;    }    rank.push_back(freq_it->first);  }  // 进行排序, 所有在 rank 中的项目,才参与建树  gFreqTable = &FreqTable;  sort(rank.begin(), rank.end(), Compare);  // 产生 FP 树  for (task_it = Task.begin(); task_it != Task.end(); task_it++) {    vector<string> buy = task_it->first;    // 重新安排task 的顺序    sort(buy.begin(), buy.end(), Compare);    TreeNode* cur = &root;    int count = task_it->second;    for (int i = 0; i < buy.size(); i++) {      string& tag = buy[i];      if (FreqTable[tag] < kMinSupport) {        continue;      }            bool is_new = true;      if (cur->children.find(tag) != cur->children.end()) {        cur = cur->children[tag];        cur->count += count;        is_new = false;      } else {        cur->children[tag] = new TreeNode(tag, cur, count);        cur = cur->children[tag];      }            if (is_new) {        if (head.find(tag) != head.end()) {          cur->Sibling = head[tag];          head[tag] = cur;        } else {          head[tag] = cur;        }      }    }  }  return;}int _tmain(int argc, _TCHAR* argv[]){  map<vector<string>, int> Task;  vector<string> buy;  FreqPattern.clear();  buy.clear();  buy.push_back("I1");  buy.push_back("I2");  buy.push_back("I5");  Task[buy]++;  buy.clear();  buy.push_back("I2");  buy.push_back("I4");  Task[buy]++;  buy.clear();  buy.push_back("I2");  buy.push_back("I3");  Task[buy]++;  buy.clear();  buy.push_back("I1");  buy.push_back("I2");  buy.push_back("I4");  Task[buy]++;  buy.clear();  buy.push_back("I1");  buy.push_back("I3");  Task[buy]++;  buy.clear();  buy.push_back("I2");  buy.push_back("I3");  Task[buy]++;  buy.clear();  buy.push_back("I1");  buy.push_back("I3");  Task[buy]++;  buy.clear();  buy.push_back("I1");  buy.push_back("I2");  buy.push_back("I3");  buy.push_back("I5");  Task[buy]++;  buy.clear();  buy.push_back("I1");  buy.push_back("I2");  buy.push_back("I3");  Task[buy]++;  map<string, int> FreqTable;  map<string, TreeNode*> head;  TreeNode root("root", NULL);  vector<string> rank;  GenConditionalTree(Task, head, root, FreqTable, rank, true);  root.dump(0);  // mining the frequency pattern;  for (int i = rank.size() - 1; i >= 0; i--) {    string& tag = rank[i];    list<string> suffix;    suffix.clear();    FpGrowth(root, tag, FreqTable[tag], head, suffix);  }  getchar();return 0;}

原创粉丝点击