动态聚类中 C-均值算法 (K-均值算法)的C++实现
来源:互联网 发布:淘宝优惠券采集api 编辑:程序博客网 时间:2024/04/30 12:38
一:说明
动态聚类方法是模式识别中一种普遍采用的方法,它具有以下3个要点:
1:选定某种距离度量作为样本间的相似性度量
2:确定某个评价聚类结果质量的准则函数
3:给定某个初始分类,然后用迭代算法找出使准则函数取极值的最好的聚类结果
本文给出了 C-均值算法 的 C++ 实现。
(算法描述参见 边肇祺 张学工等 << 模式识别 >> P237 清华大学出版社)
二:源码
2.1 头文件
#pragma once
#include <list>
#include <vector>
using namespace std;
#define DATANUM 19
#define MAXDIST 333333
struct CData
{
int x1;
int x2;
};
class CCMean
{
public:
CCMean(CData *pdata);
~CCMean(void);
void init();
void work(int InitClassNum);
private:
// calculate the mean of class i:
void CalcuMean( int i );
// calculate the ERROR of class i:
void CalcuJc(int i);
void CalcuJe();
// step 1 of C-Mean algorithm
void InitDeploy();
// step 4 and 5 of C-Mean algorithm
// da is now in class i,
// return ture when moving da from class to class k
// return false when we do not move da
bool MoveItoK( const CData& da, int i, int &k );
// calculate the distance of to data:
int dist( const CData& mean, const CData& da);
// print result:
void OutPut();
// iClassNum is the initial class number, in text book, iClassNum <==> C
int iClassNum;
// pointer to data array:
CData *pData;
// store the mean of all classes. just ueses 0 to iClassNum - 1;
// in text book is: m1, m2, m3, m4, ... , mc.
CData mean[DATANUM];
// store the ERROR of each class, just ueses 0 to iClassNum - 1;
// the sum of jc[0] to jc[iClassNum - 1] will be je defined following jc;
int jc[DATANUM];
//the sum of jc[0] to jc[iClassNum - 1]
int je;
// pcla[i] pointer class i which store in LIST
list< CData >* pcla[DATANUM];
};
2.2 实现文件
#include "assert.h"
#include "cmean.h"
CCMean::CCMean(CData *pdata)
{
pData = pdata;
for(int i = 0; i < DATANUM; i ++ )
{
pcla[i] = new list< CData >;
assert( pcla[i] != 0 );
}
je = 0;
}
CCMean::~CCMean()
{
for(int i = 0; i < DATANUM; i ++ )
delete pcla[i];
}
void CCMean::init()
{
for(int i = 0; i < DATANUM; i ++ )
{
pcla[i]->clear();
mean[i].x1 = 0;
mean[i].x2 = 0;
}
je = 0;
}
void CCMean::CalcuMean(int ii)
{
int sum1 = 0, sum2 = 0;
int si = (int)pcla[ii]->size();
list< CData >::iterator iter = pcla[ii]->begin();
for(int i = 0; i < si; i ++ )
{
sum1 += iter->x1;
sum2 += iter->x2;
iter++;
}
mean[ii].x1 = sum1 / si;
mean[ii].x2 = sum2 / si;
}
void CCMean::CalcuJe()
{
for( int i = 0; i < iClassNum ; i ++ )
{
CalcuJc( i );
je += jc[i];
}
}
void CCMean::CalcuJc( int index )
{
list< CData >::iterator iter = pcla[index]->begin();
int si = (int)pcla[index]->size();
jc[index] = 0;
for( int i = 0; i < si; i ++)
{
jc[index] += dist( mean[index], *iter );
iter ++;
}
}
int CCMean::dist(const CData& mean, const CData& da)
{
return (mean.x1 - da.x1)*(mean.x1 - da.x1) + (mean.x2 - da.x2)*(mean.x2 - da.x2);
}
void CCMean::InitDeploy()
{
CData *ptem = pData;
for( int i = 0; i < iClassNum; i ++ )
{
// choose the first iClassNum data as our initial class-center:
mean[i] = *ptem;
pcla[i]->push_back( *ptem );
ptem++;
}
// put other data to our initial classes:
for( int i = iClassNum; i < DATANUM; i ++ )
{
int mindis = MAXDIST;
int pos = 0;
// get the least distance between pData[i] and m1, m2, m3 ....
for( int j = 0; j < iClassNum; j ++ )
{
int curdis = dist( pData[i], mean[j] );
if( curdis < mindis )
{
mindis = curdis;
pos = j;
}
}
// add pData to class (pos):
pcla[pos]->push_back( pData[i] );
}
for( int i = 0; i < iClassNum ; i ++ )
CalcuMean( i );
CalcuJe();
}
bool CCMean::MoveItoK( const CData &da, int i , int& k )
{
// now da is in class i,if da is moved to another class, return true, else return false
int Pk = MAXDIST;
int Pj = 0;
int temk = 0;
for( int j = 0; j < iClassNum; j ++ )
{
int si = (int)pcla[j]->size();
if( j == i )
Pj = dist( mean[j], da ) * si/(si - 1);
else
Pj = dist( mean[j], da ) * si/(si + 1);
if( Pj < Pk )
{
Pk = Pj;
temk = j;
}
else if ( Pj == Pk && j == i )
{
// when Pj == Pk && j == i, we do not move (da) from class i to class j
temk = i;
}
}
if( i == temk )
return false; // we do NOT move da;
k = temk;
// add da to class k:
pcla[k]->push_back( da );
// delete da from class i, first find the positon of da in class i:
list< CData >::iterator iter = pcla[i]->begin();
while( iter != pcla[i]->end() )
{
if( iter->x1 == da.x1 && iter->x2 == da.x2 )
break;
iter++;
}
// now delete da from class i:
pcla[i]->erase( iter );
// we have move da from class i to class k;
return true;
}
void CCMean::OutPut()
{
for( int i = 0; i < iClassNum ; i ++ )
{
printf("class %d:/n", i );
list< CData >::iterator iter = pcla[i]->begin();
int j = 1;
while( iter != pcla[i]->end() )
{
printf( "(%d, %d) ", iter->x1, iter->x2 );
iter ++;
if( j++ % 5 == 0)
printf("/n");
}
printf("/n");
}
}
void CCMean::work(int InitClassNum)
{
iClassNum = InitClassNum;
// step 1 of C-Mean algorithm
InitDeploy();
int counter = 0;
Again:
//OutPut();
// step 2 of C-Mean algorithm: choose one sample y (here is da) from collection
for( int i = 0; i < iClassNum ; i ++ )
{
// step 3 of C-Mean algorithm:
int si = (int)pcla[i]->size();
if( si == 1 )
continue;
// step 4 of C-Mean algorithm:
list< CData >::iterator iter = pcla[i]->begin();
for(int j = 0; j < (int)pcla[i]->size(); j++)
{
int k = 0;
CData da = *iter;
iter ++;
// step 5 of C-Mean algorithm:
if( MoveItoK( da , i, k ) == true )
{
// step 6 of C-Mean algorithm:
int OldJe = je;
je -= jc[i];
je -= jc[k];
CalcuMean( i );
CalcuMean( k );
CalcuJc( i );
CalcuJc( k );
je += jc[i];
je += jc[k];
if( OldJe > je )
{
counter = 0;
goto Again;
}
}
counter++;
// step 7 of C-Mean algorithm:
if( counter == DATANUM )
goto end;
}
}
end:
printf(" current Je is: %d/n", je );
OutPut();
}
2.3 测试文件
#include "CMean.h"
#include "process.h"
CData yy[DATANUM] =
{
{0,0},{0,1},{1,0},{1,1},{1,2},{2,1},{2,2},{2,3}
,{6,6},{6,7},{6,8},{7,6},{7,7},{7,8},{7,9},{8,7}
,{8,8},{8,9},{9,8}
};
int main(int argc, char* argv[])
{
CCMean cmean( yy );
cmean.work(2);
system("pause");
return 0;
}
- 动态聚类中 C-均值算法 (K-均值算法)的C++实现
- K均值算法的c语言实现
- C/C++语言实现K均值(C均值)聚类算法
- k-均值聚类算法c语言版
- k-均值聚类算法c语言版
- k-均值聚类算法c语言版
- k-均值聚类算法c语言版
- k-均值算法的java实现
- 聚类分析的K均值算法(Python实现)
- K均值算法matlab实现
- k-均值算法及其实现
- K均值 && 模糊c均值
- K均值 && 模糊c均值
- K均值的算法步骤
- 模糊C均值聚类算法的实现
- k-means(k均值聚类)算法介绍及实现(c++)
- k-means(k均值聚类)算法介绍及实现(c++)
- k-均值聚类算法C语言源码
- 数据库设计中的14个技巧
- 一个女生是怎么样在QQ上被一个男生气死的
- 下载rtsp,mms地址的好软件
- 美女接电话是怎么样被气死的
- 鸟笼山剿匪记
- 动态聚类中 C-均值算法 (K-均值算法)的C++实现
- Effective STL目录
- 在可执行jar中动态载入第三方jar(转贴)
- asp调用orcle存储过程(过程中含有输出参数)
- IE样式的expression与XML数据岛绑定有点冲突
- 一位老工程师给年轻工程师的十个忠告
- 开始
- 调用只含有输入参数的过程。(asp,oracle)
- 继续翻译