获取最大重复子串

来源:互联网 发布:div加name属性 js 编辑:程序博客网 时间:2024/05/19 20:59

 static int length = 1; //记录重复最长的子串长度
vector<vector<int> > gVvec; //记录重复最长的子串

/* 得到一个字符重复的各个集合*/
vector<vector<int> > getOneCharSameSet(const string& str)
{
     //找一个字符重复时利用直接哈希,对质数取模(131包含所有assici字符范围)
     vector<vector<int> > vvec;
     for (int i=0; i < 131; ++i)
     {
         vector<int> ivec;
         vvec.push_back(ivec);
     }

     for (unsigned int i=0; i < str.size(); ++i)
     {
         vvec[ str%131 ].push_back(i);
     }

     //选出有重复的集合
     vector<vector<int> > setVec;
     for ( unsigned int i=0; i < vvec.size(); ++i)
     {
         if ( vvec.size() >= 2 )
         {
              setVec.push_back(vvec);
         }
     }

     return setVec;
}

/* 得到更长的字串重复长度*/
void getMoreCharSameSet(const string& str, const vector<vector<int> >& vvec)
{
     vector<vector<int> > resultVvec;

     for (unsigned int i=0; i < vvec.size(); ++i)//针对每一个重复集合
     {
         vector<vector<int> > setVec;
         for (int k=0; k < 131; ++k)
         {
              vector<int> tmp;
              setVec.push_back(tmp);
         }

         for (unsigned int j=0; j < vvec.size(); ++j)
         {
              if ( vvec[j]+1 < str.size() )
              {
                   setVec[ str[ (vvec[j]+1) ]% 131 ].push_back(vvec[j]+1);
              }
         }

         //选出有重复的集合
         vector<vector<int> > tmp;
         for (unsigned int k=0; k < setVec.size(); ++k)
         {
              if ( setVec[k].size() >= 2 )
              {
                   tmp.push_back(setVec[k]);
              }
         }

         for (unsigned int k=0; k < tmp.size(); ++k)
         {
              resultVvec.push_back(tmp[k]);
         }
     }

     //递归调用
     if ( resultVvec.size() > 0 )
     {
         ++length;
         gVvec = resultVvec;
         getMoreCharSameSet(str, resultVvec);
     }
}

void GetChild(string strsrc)
{
     string str = strsrc;
     cout << "原字符串为: " << strsrc << endl;
     vector<vector<int> > vvec = getOneCharSameSet(str);
     getMoreCharSameSet(str, vvec);

     cout << "重复最长的子串长度为: " << length << endl;
     cout << "重复最长的子串分别为: " << endl;

     for ( unsigned int i=0; i < gVvec.size(); ++i )
     {
         for ( unsigned int j=0; j < gVvec.size(); ++j)
         {
              cout << "起始位置: " << gVvec[j] - length + 1 << endl;
              for ( int k = gVvec[j] - length + 1; k <= gVvec[j]; ++k )
              {
                   cout << str[k];
              }
              cout << endl;
         }
     }

     return;
}

原创粉丝点击