Kmeans++算法C++代码

来源:互联网 发布:cnc加工中心编程代码 编辑:程序博客网 时间:2024/04/27 19:42
原理见网站http://blog.csdn.net/loadstar_kun/article/details/39450615。C++代码如下
#include<math.h>
#include<stdio.h>
#include<cstdlib>
#include<ctime>
#include<vector>
#include<iostream>
using namespace std;
struct Result_Sort{
 int minlocation;
 double result;
};
//排序,对所有点分簇,返回分簇的簇,和距离的大小
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1);
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1){
 Result_Sort1.result = tmp[0];
 for (int i = 1; i < tmp.size(); i++){
  if (Result_Sort1.result>tmp[i])
  {
   Result_Sort1.result = tmp[i];
   Result_Sort1.minlocation = i;
  }
 }
 return Result_Sort1;
}
struct K_Means_Result{
 vector<int>Label;
 vector<double>CentralPoint;
};
//一维的标准的k-means算法
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1);
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1){
 int M, N;
 M = Input.size();
 N = K_Means_Result1.CentralPoint.size();
 vector<vector<double>> Dist(M, vector<double>(N));
 vector<double> mindist(M);
 vector<double>tmpCen(N);
 vector<int> Count(N);
 for (int i = 0; i < M; i++){ mindist[i] = 0.0;}
 for (int i = 0; i < N; i++){ tmpCen[i] = 0.0; Count[i] = 0; }
 
 //迭代次数
 int iteration = 30;
 for (int iter = 0; iter<iteration; iter++){
  for (int len = 0; len < M ; len++){
   for (int kk = 0; kk < N; kk++){
    //计算所有点和所有质心的距离
    Dist[len][kk] = sqrt((K_Means_Result1.CentralPoint[kk] - Input[len])*(K_Means_Result1.CentralPoint[kk] - Input[len]));
   }
   //找到所有点的最小的距离,和对应的分类簇
   vector<double> tmp;
   tmp = Dist[len];
   Result_Sort Result_Sort1;
   Result_Sort1.minlocation = 0;
   Result_Sort1.result = 0.0;
   Result_Sort1=Sort(tmp, Result_Sort1);
   K_Means_Result1.Label[len] = Result_Sort1.minlocation;
   mindist[len] = Result_Sort1.result;
  }
  //对每个类重新计算质心
  for (int kk = 0; kk < N; kk++){
   for (int len = 0; len < M; len++){
    if (K_Means_Result1.Label[len] == kk)  //判断类别
    {
     tmpCen[kk] += Input[len];
     Count[kk] += 1;
    }
   }
  }
  for (int kk = 0; kk < N; kk++){
   K_Means_Result1.CentralPoint[kk] = tmpCen[kk] / Count[kk];
  }
 }
 return K_Means_Result1;
}
//这里也要有结构体。返回Init3toK的参数
struct InitInformation{
 vector<int>Label;
 vector<double> Sum;
 double sum;
 vector<double>CentralPoint;
};
//聚类数K>=3时,初始化K>=3的聚类点
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index);
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index){
 Dist.clear();
// double index = rand() % (int)(InitInfor.sum) + (InitInfor.sum - (int)InitInfor.sum)*(rand() % 1000 / float(1000.0));
// cout << index << endl;
 vector<double> dist;
 for (int i = 0; i < InitInfor.Sum.size(); i++){
  if (InitInfor.Sum[i]>=index)
  {
   InitInfor.CentralPoint.push_back(Input[i]);  //到底是i还是i+1,反正这里簇中心点被覆盖了
  // printf("第%d个点的位置个点的位置",i+1);
  // cout << "数值" << Input[i] << endl;
   InitInfor.Label[i] = Point - 1; //第point个聚类点的类别
   break;
  }
 }
 //再求所有点到所有聚类点的距离
 InitInfor.sum = 0;
 InitInfor.Sum.clear();
 for (int len = 0; len < Input.size(); len++){
  for (int kk = 0; kk < InitInfor.CentralPoint.size(); kk++){
   double tmp = sqrt((Input[len] - InitInfor.CentralPoint[kk])*(Input[len] - InitInfor.CentralPoint[kk]));
   dist.push_back(tmp);
  }
  //找到最小的添加,顺便归类
  Result_Sort Result_Sort1;
  Result_Sort1.minlocation = 0;
  Result_Sort1.result = 0.0;
  Result_Sort1 = Sort(dist, Result_Sort1); //所分的中心点,怎么存储呢?每个点所属的类,result和location没有return
 // cout << Result_Sort1.result << endl;
  Dist.push_back(Result_Sort1.result);
  dist.clear();
 }
 for (int i = 0; i < Dist.size(); i++){
  InitInfor.sum += Dist[i];
  InitInfor.Sum.push_back(InitInfor.sum);
 }
 return InitInfor;
}
K_Means_Result K_meansplusplus(vector<double> Input, int K){
 //随机选择第一个聚类中心点
 vector<double> CentralPoint;  //K个聚类中心点
 vector<int> Label(Input.size());  //标签表明分类
 srand(time(NULL));
 int ind=(rand() % K);  //0到K-1的整数
 CentralPoint.push_back(Input[ind]);
// cout << "第一个点的位置" << ind +1<< endl;
// cout << "第一个点的数值" << Input[ind] << endl;
 for (int i = 0; i < Input.size(); i++){ Label[i] = -1; }
 Label[ind] = 0; //第一个聚类点的类别
 //对于每一个数据点,计算和该中心点的距离,包括它自身
 vector<double> Dist;
 //需要一个vector<vector<double>> 存储每个点以及所属的类,并未采用
 vector<double> Sum;
 double sum = 0;
 for (int i = 0; i < Input.size(); i++){
  double dist;
  dist = sqrt((Input[i] - CentralPoint[0])*(Input[i] - CentralPoint[0]));//欧式距离
  Dist.push_back(dist);
//  cout << dist << endl;
  sum += dist;
  Sum.push_back(sum);
 }
 //选择一个新的数据点作为新的聚类中心,选择的原则是:D(x)较大的点,被选取作为聚类中心的概率较大
 //第二个点的选择,将所有的距离求和,Dist[]/Sum(Dist),乘随机数概率,最大的点为第二个点
 double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0));   //找到index处于哪个点的距离区间
 for (int i = 0; i < Sum.size(); i++){
  if (Sum[i]<index && Sum[i + 1]>index)
  {
   CentralPoint.push_back(Input[i]);
//   cout<<"第二个点的位置" << i+1 << endl;
//   cout << "第二个点的数值" << Input[i] << endl;
   Label[i] = 1; //第二个聚类点的类别
   }
 }
 Dist.clear();
 sum = 0;
 Sum.clear();
 vector<double> dist;
 //计算欧氏距离
 for (int i = 0; i < Input.size(); i++){
  for (int j = 0; j < CentralPoint.size(); j++)
  {
   double tmp = sqrt((Input[i] - CentralPoint[j])*(Input[i] - CentralPoint[j]));
   dist.push_back(tmp);
  }
  //找到最小的添加,顺便归类
  Result_Sort Result_Sort1;
  Result_Sort1.minlocation = 0;
  Result_Sort1.result = 0.0;
  Result_Sort1 = Sort(dist, Result_Sort1);  //Pass
  Dist.push_back(Result_Sort1.result);
  dist.clear();
 }
 for (int i = 0; i < Dist.size(); i++){
  sum += Dist[i];
  Sum.push_back(sum);
 }
 //第三个点的选择,第四个点的选择,,,都有所不同,大概递归调用//直至选择K个中心点
 if (K >= 3){
  vector<double> Dist3toK(Input.size());
  for (int i = 3; i <= K; i++){
   double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0));
   InitInformation InitInfor;
   InitInfor.Label = Label;
   InitInfor.CentralPoint = CentralPoint;
   InitInfor.Sum = Sum;
   InitInfor.sum = sum;
   InitInfor = Init3toK(Dist3toK, Input, i, InitInfor, index); // double 型函数return double,然后这里的return Label返回值并没有覆盖原值,可能只有CentralPoint的值覆盖改变了
   CentralPoint = InitInfor.CentralPoint;
   Label=InitInfor.Label;
  }
 }
 //一维的标准的k-means算法
 K_Means_Result K_Means_Result1;
 K_Means_Result1.CentralPoint = CentralPoint;
 K_Means_Result1.Label = Label;
 K_Means_Result1 = K_Means(Input, K_Means_Result1);
 //输出
 return K_Means_Result1;
}
/*K-means++ 算法,对于一维数据,给定聚类中心数K,自适应的确定初始聚类中心*/ 
int main(){
 double Myarray[20] = { 1.1, 2.2, 3.7, 5.6, 7.9, 9.9, 0.1, 4.6, 8.5, 1.2, 15.4, 15.3, 1.1, 1.6, 9.0, 8.7, 4.4, 5.9, 0.01, 0.0091 };
 vector<double> Input(Myarray, Myarray + 20);
    int K = 4; //K>=2  现在处理4和以上的就崩溃
 K_Means_Result K_meansplusplus(vector<double> Input, int K);//输入数据和聚类数
 K_Means_Result K_Means_Result1;
 K_Means_Result1 = K_meansplusplus(Input, K);
 for (int i = 0; i < K_Means_Result1.Label.size(); i++){cout << "点位置" << Input[i] << "所属 类簇" << K_Means_Result1.Label[i] << endl;}
 for (int i = 0; i < K_Means_Result1.CentralPoint.size(); i++){cout << "类中心点" << K_Means_Result1.CentralPoint[i] << endl;}
 K_Means_Result1.Label.clear();
 K_Means_Result1.CentralPoint.clear();
 char ch = getchar();
 return 0;
}
原创粉丝点击