prefixspan源码

来源:互联网 发布:static nodes.json 编辑:程序博客网 时间:2024/06/08 15:16

 

 

// PrefixSpan.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include<iostream>
#include<iomanip>
#include<vector>
#include<map>
#include<set>
#include<string>
using namespace std;

 

//data format
typedef vector<vector<vector<string>>> strTVect;
typedef vector<vector<string>> strDVect;
typedef vector<string> strVect;
//item and occur times
typedef map<string,int> strIntMap;
//items' name
typedef set<string> strSet;
//project data
typedef map<string,strTVect> vectMap;


void showData(strTVect datas)
{
 strTVect::const_iterator strTIter;
 strDVect::const_iterator strDIter;
 strVect::const_iterator strIter;
 for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
 {
  for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
  {
   string temp = "";
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    temp += *strIter;
   }
   cout.width(5);
   cout << setiosflags(ios::left) << temp;
  }
  cout << endl;

 }
}

//get length-1 items
strIntMap getItems(strTVect datas,strSet items,int threshold)
{
 strTVect::const_iterator strTIter;
 strDVect::const_iterator strDIter;
 strVect::const_iterator strIter;
 //item and occur times
 strIntMap itemsMap;
 //get all possible items;
 for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
 {
  //element
  strSet temp = items;
  for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
  {
   //item
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    if(temp.find(*strIter) != temp.end())
    {
     if(itemsMap.find(*strIter) != itemsMap.end())
     {
      itemsMap.find(*strIter)->second++;
     }
     else
     {
      itemsMap.insert(make_pair(*strIter,1));
     }
     temp.erase(*strIter);
    }
   }
  }
 }
 
 //filt by threshold
 strIntMap::iterator mapIter;
 for(mapIter = itemsMap.begin(); mapIter != itemsMap.end();)
 {
  if(mapIter->second < threshold)
  {
   //put the return iterator to mapIter
   mapIter = itemsMap.erase(mapIter);
  }
  else
  {
   mapIter++;
  }
 }

 ////show
 //for(mapIter = itemsMap.begin(); mapIter != itemsMap.end(); mapIter++)
 //{
 // cout << mapIter->first << " " << mapIter->second << endl;
 //}

    return itemsMap;
}

void filtDataByItems(strTVect &datas, strIntMap itemsMap)
{
 strTVect::iterator strTIter;
 strDVect::iterator strDIter;
 strVect::iterator strIter;
 for(strTIter = datas.begin(); strTIter != datas.end();)
 {
  //element
  for(strDIter = strTIter->begin(); strDIter != strTIter->end();)
  {
   //item
   for(strIter = strDIter->begin(); strIter != strDIter->end();)
   {
    //remove from item
    if(itemsMap.find(*strIter) == itemsMap.end())
    {
     strIter = strDIter->erase(strIter);
    }
    else
    {
     strIter++;
    }
   }
   //pay attention to erase some value
   //if(strDIter->empty())
   if(strDIter->size() == 0)
   {
    strDIter = strTIter->erase(strDIter);
   }
   else
   {
    strDIter++;
   }

  }
  if(strTIter->empty())
  {
   strTIter = datas.erase(strTIter);
  }
  else
  {
   strTIter++;
  }

 }
}

//divide search place by first prefix or get the project like <30 x>
vectMap simpleProjectData(strTVect datas,int threshold)
{
 strTVect::const_iterator strTIter;
 strDVect::const_iterator strDIter;
 strVect::const_iterator strIter;
 strTVect pTVect;
 strDVect pDVect;
 strVect pVect;
    vectMap pVectMap;
 strSet items;

 for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
 {
  items.clear();
  //element
  for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
   
   //item
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    //this element is the same as the last element of prefix
    //it doesn't work at the first call
    //it works when call by projectData function
    if(*strIter == "_")
    {
     break;
    }
    //if find a strIter without sequence then add to pVectMap
    if(items.find(*strIter) == items.end())
    {
     //cout << *strIter << endl;
     strVect::const_iterator tempIter;
     strDVect::const_iterator tempDIter;
     pDVect.clear();
     //traverse the last part of element
     if((strIter+1) != strDIter->end())
        
      pVect.clear();
      pVect.push_back("_");
      for(tempIter = strIter+1; tempIter != strDIter->end(); tempIter++)
      {
       pVect.push_back(*tempIter);
      }
      pDVect.push_back(pVect);         
     }
     //traverse the last part of sequence
     for(tempDIter = strDIter+1; tempDIter != strTIter->end(); tempDIter++)
     {
      pDVect.push_back(*tempDIter);
     
     //add to project data
     if(pVectMap.find(*strIter) == pVectMap.end())
     {
      pTVect.clear();
      pTVect.push_back(pDVect);
      pVectMap.insert(make_pair(*strIter,pTVect)); 
     }
     else
     {
      pVectMap.find(*strIter)->second.push_back(pDVect);
     }

     items.insert(*strIter);
    }
   }

  
 }

 //filt project data by threshold
 vectMap::iterator vMIter;
 for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();)
 {
  if(vMIter->second.size() < threshold)
  {
   vMIter =pVectMap.erase(vMIter);
  }
  else
  {
   vMIter++;
  }
 }

 return pVectMap;
}

//find prefix like <(30 x)>,<(_ x)>
vectMap normalProjectData(strTVect datas,strDVect prefix, int threshold)
{
 vectMap pVectMap;
 strTVect::const_iterator strTIter;
 strDVect::const_iterator strDIter;
 //strIter2:the last item of last element of prefix
 strVect::const_iterator strIter,strIter2;
 strTVect pTVect;
 strDVect pDVect;
 strVect pVect,preVect;
 strSet items;

 //get the last element of prefix 
 preVect = prefix.at(prefix.size()-1);
 for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
 {
  items.clear();
  //element
  for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
   
   //item
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    
     if(*strIter == "_")
     {
      strIter2 = preVect.end();
      strIter++;
     }
     else
     {
      //find the same of the last element of prefix 
      for(strIter2 = preVect.begin(); strIter2 != preVect.end() && strIter != strDIter->end();)
      {
       //*strIter  equal *strIter2
       if(*strIter == *strIter2)
       {
        strIter++;
        strIter2++;
       }
       else
       {
        break;
       }
      }
     }
        
     string temp;   
     if(strIter2 == preVect.end() && strIter != strDIter->end())
     {
      //if find a strIter without sequence then add to pVectMap
      if(items.find(*strIter) == items.end())
      {
       temp = *strIter;
       pDVect.clear();
       if((strIter+1) != strDIter->end())
       {
        pVect.clear();
        pVect.push_back("_");
        for(strIter = strIter+1;strIter != strDIter->end(); strIter++)
        {
         pVect.push_back(*strIter);
        }
        pDVect.push_back(pVect);
       }
       strDVect::const_iterator tempDIter;
       //traverse the last part of sequence
       for(tempDIter = strDIter + 1; tempDIter != strTIter->end(); tempDIter++)
       {
        pDVect.push_back(*tempDIter);
       
       //add to project data
       if(pVectMap.find(temp) == pVectMap.end())
       {
        pTVect.clear();
        pTVect.push_back(pDVect);
        pVectMap.insert(make_pair(temp,pTVect)); 
       }
       else
       {
        pVectMap.find(temp)->second.push_back(pDVect);
       }
       items.insert(temp);
       }

       break;
     }
     if(strIter == strDIter->end())
     {
      break;
      
   }
  }
 }

 //filt project data by threshold
 vectMap::iterator vMIter;
 for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();)
 {
  if(vMIter->second.size() < threshold)
  {
   vMIter =pVectMap.erase(vMIter);
  }
  else
  {
   vMIter++;
  }
 }

 return pVectMap;
}

//the principal function. two functions recursion
void projectData(strTVect datas,strDVect prefix,int threshold)
{
 //find prefix like <30 x>
 vectMap simpleMap = simpleProjectData(datas,threshold);
 vectMap::iterator vMIter;
 strDVect::const_iterator strDIter;
 strVect::const_iterator strIter;
 strVect preVect;
 strDVect temp;
 for(vMIter = simpleMap.begin(); vMIter != simpleMap.end();vMIter++)
 {
  temp = prefix;
  preVect.clear();
  preVect.push_back(vMIter->first);
  temp.push_back(preVect);

  cout <<"<";
  for(strDIter = temp.begin(); strDIter != temp.end(); strDIter++)
  {
   if(strDIter->size() > 1)
   {
    cout << "(";
   }
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    cout <<*strIter<< " ";
   }
   if(strDIter->size() > 1)
   {
    cout << ") ";
   }
  }
  cout << "> :" << vMIter->second.size() << endl;
  showData(vMIter->second);
  projectData(vMIter->second,temp,threshold);
 }

    //find prefix like <(30 x)> or <(_ x)>
 vectMap normalMap = normalProjectData(datas,prefix,threshold);
 for(vMIter = normalMap.begin(); vMIter != normalMap.end(); vMIter++)
 {
  temp = prefix;
  temp.at(temp.size()-1).push_back(vMIter->first);
  strDVect::const_iterator strDIter;
  strVect::const_iterator strIter;
  cout << "<";
  for(strDIter = temp.begin(); strDIter != temp.end(); strDIter++)
  {
   if(strDIter->size() > 1)
   {
    cout << "(";
   }
   for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
   {
    cout <<*strIter<< " ";
   }
   if(strDIter->size() > 1)
   {
    cout << ") ";
   }
  }
  cout << "> :" << vMIter->second.size() << endl;
  showData(vMIter->second);
  projectData(vMIter->second,temp,threshold);
 }
}

 

//initial data source and save in datas
void initData(strTVect &datas,strSet &items)
{
 //"a","abc","ac","d","cf","ad","c","bc","ae","ef","ab","df","c","b","e","g","af","c","b","c"
 strDVect seq;
 strVect ele;

 ele.push_back("a");
 ele.push_back("b");
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("b");
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("d");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("c");
 ele.push_back("f");
 seq.push_back(ele);
 datas.push_back(seq);

 ele.clear();
 seq.clear();
 ele.push_back("a");
 ele.push_back("d");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("b");
 ele.push_back("c");
 ele.push_back("d");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("e");
 seq.push_back(ele);
 datas.push_back(seq);

 ele.clear();
 seq.clear();
 ele.push_back("e");
 ele.push_back("f");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("b");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("d");
 ele.push_back("f");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("b");
 seq.push_back(ele);
 datas.push_back(seq);

 ele.clear();
 seq.clear();
 ele.push_back("e");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("g");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("a");
 ele.push_back("f");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("c");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("b");
 seq.push_back(ele);
 ele.clear();
 ele.push_back("c");
 seq.push_back(ele);
 datas.push_back(seq); 

 items.insert("a");
 items.insert("b");
 items.insert("c");
 items.insert("d");
 items.insert("e");
 items.insert("f");
}

 

int _tmain(int argc, _TCHAR* argv[])
{
 strTVect datas;
 strSet items;
 initData(datas,items);
 showData(datas);
 int threshold = 2;
 strIntMap itemsMap = getItems(datas,items,threshold);
    filtDataByItems(datas,itemsMap);
 cout << "***********************************" << endl;
 showData(datas);
 vectMap pVectMap = simpleProjectData(datas,threshold);
 ////show
 cout << "***********************************" << endl;
 vectMap::iterator mapIter;
 for(mapIter = pVectMap.begin(); mapIter != pVectMap.end(); mapIter++)
 {
  cout << mapIter->first << " " << endl;
  showData(mapIter->second);
 }
 
 vectMap::iterator vMIter;
 strDVect prefix;
 strVect preVect;
 for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();vMIter++)
 {
  cout << "***************************" << endl;
  cout << "<" << vMIter->first << "> :" << vMIter->second.size() << endl;
  //showData(vMIter->second);
  prefix.clear();
  preVect.clear();
  preVect.push_back(vMIter->first);
  prefix.push_back(preVect);
  projectData(vMIter->second,prefix,threshold);
 }


 return 0;
}

 

 来源:http://blog.sina.com.cn/s/blog_6e85bf420100o6cp.html

 

0 0
原创粉丝点击