动态聚类中 C-均值算法 (K-均值算法)的C++实现

本文给出了 C-均值算法 的 C++  实现。

(算法描述参见  边肇祺  张学工等  << 模式识别 >> P237 清华大学出版社)




2.1  头文件


#pragma once

#include <list>

#include <vector>

using namespace std;

#define DATANUM 19

#define MAXDIST 333333


struct CData


    int x1;

    int x2;




class CCMean



    CCMean(CData *pdata);


    void init(); 

    void work(int InitClassNum);



    // calculate the mean of class i:

    void CalcuMean( int i );


    // calculate the ERROR of class i:

    void CalcuJc(int i);

    void CalcuJe();


    // step 1 of C-Mean algorithm

    void InitDeploy();


    // step 4 and 5 of C-Mean algorithm

    // da is now in class i,

    // return ture when moving da from class to class k

    // return false when we do not move da

    bool MoveItoK( const CData& da, int i, int &k );


    // calculate the distance of to data:

    int  dist( const CData& mean, const CData& da);


    // print result:

    void OutPut();



    // iClassNum is the initial class number, in text book, iClassNum <==> C

    int iClassNum;


    // pointer to data array:

    CData *pData;


    // store the mean of all classes. just ueses 0 to iClassNum - 1;

    // in text book is: m1, m2, m3, m4, ... , mc.

    CData mean[DATANUM];


    // store the ERROR of each class, just ueses 0 to iClassNum - 1;

    // the sum of jc[0] to jc[iClassNum - 1] will be je defined following jc;

    int jc[DATANUM];


    //the sum of jc[0] to jc[iClassNum - 1]

    int je;


    // pcla[i] pointer class i which store in LIST

    list< CData >* pcla[DATANUM];




2.2  实现文件



#include "assert.h"

#include "cmean.h"


CCMean::CCMean(CData *pdata)


    pData = pdata;

    for(int i = 0; i < DATANUM; i ++ )


       pcla[i] = new list< CData >;

       assert( pcla[i] != 0 );


    je = 0;





    for(int i = 0; i < DATANUM; i ++ )

       delete pcla[i];


void CCMean::init()


    for(int i = 0; i < DATANUM; i ++ )



       mean[i].x1 = 0;

       mean[i].x2 = 0;


    je = 0;



void CCMean::CalcuMean(int ii)


    int sum1 = 0, sum2 = 0;

    int si = (int)pcla[ii]->size();


    list< CData >::iterator iter = pcla[ii]->begin();

    for(int i = 0; i < si; i ++ )


       sum1 += iter->x1;

       sum2 += iter->x2;



    mean[ii].x1 = sum1 / si;

    mean[ii].x2 = sum2 / si;



void CCMean::CalcuJe()


    for( int i = 0; i < iClassNum ; i ++ )


       CalcuJc( i );

       je += jc[i];




void CCMean::CalcuJc( int index )


    list< CData >::iterator iter = pcla[index]->begin();

    int si = (int)pcla[index]->size();



    jc[index] = 0;

    for( int i = 0; i < si; i ++)


       jc[index] += dist( mean[index], *iter );

       iter ++;




int CCMean::dist(const CData& mean, const CData& da)


    return (mean.x1 - da.x1)*(mean.x1 - da.x1) + (mean.x2 - da.x2)*(mean.x2 - da.x2);



void CCMean::InitDeploy()


    CData *ptem = pData;

    for( int i = 0; i < iClassNum; i ++ )


       // choose the first iClassNum data as our initial class-center:

       mean[i] = *ptem;

       pcla[i]->push_back( *ptem );




    // put other data to our initial classes:

    for( int i = iClassNum; i < DATANUM; i ++ )


       int mindis = MAXDIST;

       int pos = 0;


       // get the least distance between pData[i] and m1, m2, m3 ....

       for( int j = 0; j < iClassNum; j ++ )


           int curdis = dist( pData[i], mean[j] );

           if( curdis < mindis )


              mindis = curdis;

              pos = j;



       // add pData to class (pos):

       pcla[pos]->push_back( pData[i] );



    for( int i = 0; i < iClassNum ; i ++ )

       CalcuMean( i );






bool CCMean::MoveItoK( const CData &da, int i , int& k )


    // now da is in class i,if da is moved to another class, return true, else return false

    int Pk = MAXDIST;

    int Pj = 0;

    int temk = 0;

    for( int j = 0; j < iClassNum; j ++ )


       int si = (int)pcla[j]->size();

       if( j == i )

           Pj = dist( mean[j], da ) * si/(si - 1);


           Pj = dist( mean[j], da ) * si/(si + 1);

       if( Pj < Pk ) 


           Pk = Pj;

           temk = j;


       else if ( Pj == Pk  && j == i )


           // when Pj == Pk && j == i, we do not move (da) from class i to class j

           temk = i;




    if( i == temk )

       return false; // we do NOT move da;


    k = temk;

    // add da to class k:

    pcla[k]->push_back( da );


    // delete da from class i, first find the positon of da in class i:

    list< CData >::iterator iter = pcla[i]->begin();

    while( iter != pcla[i]->end() )


       if( iter->x1 == da.x1 && iter->x2 == da.x2 )





    // now delete da from class i:

    pcla[i]->erase( iter );


    // we have move da from class i to class k;

    return true;



void CCMean::OutPut()


    for( int i = 0; i < iClassNum ; i ++ )


       printf("class %d:/n", i );

       list< CData >::iterator iter = pcla[i]->begin();

       int j = 1;

       while( iter != pcla[i]->end() )


           printf( "(%d,  %d)       ", iter->x1, iter->x2 );

           iter ++;

           if( j++ % 5 == 0)







void CCMean::work(int InitClassNum)


    iClassNum = InitClassNum;


    // step 1 of C-Mean algorithm



    int counter = 0;



    // step 2 of C-Mean algorithm: choose one sample y (here is da) from collection

    for( int i = 0; i < iClassNum ; i ++ )


       // step 3 of C-Mean algorithm:

       int si = (int)pcla[i]->size();

       if( si == 1 )



       // step 4 of C-Mean algorithm:

       list< CData >::iterator iter = pcla[i]->begin();

       for(int j = 0; j < (int)pcla[i]->size(); j++)


           int k = 0;

           CData da = *iter;

           iter ++;


           // step 5 of C-Mean algorithm:

           if( MoveItoK( da , i, k ) == true )


              // step 6 of C-Mean algorithm:

              int OldJe = je;

              je -= jc[i];

              je -= jc[k];


              CalcuMean( i );

              CalcuMean( k );


              CalcuJc( i );

              CalcuJc( k );


              je += jc[i];

              je += jc[k];

              if( OldJe > je )


                  counter = 0;

                    goto Again;




           // step 7 of C-Mean algorithm:

           if( counter == DATANUM )

              goto end;




    printf(" current Je is: %d/n", je );




2.3  测试文件


#include "CMean.h"

#include "process.h"


CData yy[DATANUM] =







int main(int argc, char* argv[])


    CCMean cmean( yy );




    return 0;

