K-means的简单实现

来源:互联网 发布:先导爱知出现集数 编辑:程序博客网 时间:2024/05/16 07:53

这篇文章中的K-means是简单的实现,没有调初始质心点和K的值,k的值设为3,初始质心点是测试数据的前三个。

并且没有对数据进行归一化处理,只是用原始数据进行聚类。

下面是代码:

#include <iostream>#include<sstream>#include<fstream>#include<vector>#include<math.h>#include<stdlib.h>#define k 3using namespace std;typedef vector<double> Train;int dataNum;int dimNum;int Min(double d1,double d2,double d3){     if(d1<=d2 && d1<=d3)        return 1;     if(d2<=d1 && d2<=d3)        return 2;     if(d3<=d1 && d3<=d2)        return 3;}void getMinDistance(vector<Train>& trains,vector<Train>Cen){   double d1=0;   double d2=0;   double d3=0;   for(int i=0;i<dataNum;i++){    for(int j=1;j<dimNum;j++){        d1 += (trains[i][j]-Cen[0][j])*(trains[i][j]-Cen[0][j]);        d2 += (trains[i][j]-Cen[1][j])*(trains[i][j]-Cen[1][j]);        d3 += (trains[i][j]-Cen[2][j])*(trains[i][j]-Cen[2][j]);    }     switch(Min(d1,d2,d3)){    case 1:        trains[i][0]=1; break;    case 2:        trains[i][0]=2; break;    case 3:        trains[i][0]=3; break;    default :        break;        }    d1=0;d2=0;d3=0;   }}vector<Train> getNewCenter(vector<Train> trains){     //重新计算质心点     vector<Train> newCenter;     Train centerdata1(dimNum+1,0);     Train centerdata2(dimNum+1,0);     Train centerdata3(dimNum+1,0);     centerdata1[0]=1;     centerdata2[0]=2;     centerdata3[0]=3;     int i1,i2,i3;     i1=0;i2=0;i3=0;     for(int i=0;i<dataNum;i++){            int Cases = (int)(trains[i][0]);        switch(Cases){        case 1:            i1++;            for(int j=1;j<dimNum;j++){                centerdata1[j] += trains[i][j];            }            break;        case 2:            i2++;            for(int j=1;j<dimNum;j++){                centerdata2[j] += trains[i][j];            }            break;        case 3:            i3++;            for(int j=1;j<dimNum;j++){                centerdata3[j] += trains[i][j];            }            break;        default:            break;        }     }     for(int j=1;j<dimNum;j++){        centerdata1[j]=centerdata1[j]/i1;        centerdata2[j]=centerdata2[j]/i2;        centerdata3[j]=centerdata3[j]/i3;     }     newCenter.push_back(centerdata1);     newCenter.push_back(centerdata2);     newCenter.push_back(centerdata3);     return newCenter;}vector<Train> Sort(vector<Train>& trains,vector<Train>Cen){    vector<Train>newCenter;    //数据分类    getMinDistance(trains,Cen);    //重新计算质心点    newCenter = getNewCenter(trains);    return newCenter;}void KMeans(vector<Train>& trains){    //确定初始质心点 just use the 1,2,3,data(cannot ensure they are randable)    //the first sort    int isNotChange=1;    //init centers    vector<Train>lastCen;    vector<Train>Centers;    for(int i=0;i<3;i++){   //  Centers.push_back(trains[i]);     lastCen.push_back(trains[i]);    }    while(isNotChange){        Centers = Sort(trains,lastCen);        if(Centers != lastCen ){            lastCen = Centers;        }        else{            isNotChange = 0;        }    }    cout<<"聚类结果:"<<endl;    for(int i=0;i<dataNum;i++){        for(int j=0;j<=dimNum;j++){            cout<<trains[i][j]<<" ";        }        cout<<endl;    }}int main(){   char fname[256];   cout<<"请输入存放数据的文件名:"<<endl;   cin>>fname;   cout<<"维数:"<<endl;   cin>>dimNum;   cout<<"样本数目:"<<endl;   cin>>dataNum;   ifstream infile(fname);   if(!infile){    cout<<"cannot open the choosen file!"<<fname<<endl;    return 0;   }   vector<Train> trains;   for(int i=0;i<dataNum&&!infile.eof();i++){    string str;    getline(infile,str); //read data from infile to str;  only one line    istringstream istr(str); //read data from str to istr;    Train train(dimNum+1,0);    train[0]=i+1;    for(int j=1;j<=dimNum;j++){         istr>>train[j];    }    trains.push_back(train);   }   cout<<"开始聚类"<<endl;   KMeans(trains);   return 0;}


测试结果如下图:

 

第一列即为聚类结果,1表示第一类,2表示第二类,3表示第3类

只是最简单的K-means过程,,

如果有好的调K-means的方法,欢迎提出~

 

 

 

0 0
原创粉丝点击