Kmeans++算法C++代码
来源:互联网 发布:cnc加工中心编程代码 编辑:程序博客网 时间:2024/04/27 19:42
原理见网站http://blog.csdn.net/loadstar_kun/article/details/39450615。C++代码如下
#include<math.h>
#include<stdio.h>
#include<cstdlib>
#include<ctime>
#include<vector>
#include<iostream>
using namespace std;
#include<stdio.h>
#include<cstdlib>
#include<ctime>
#include<vector>
#include<iostream>
using namespace std;
struct Result_Sort{
int minlocation;
double result;
};
//排序,对所有点分簇,返回分簇的簇,和距离的大小
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1);
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1){
Result_Sort1.result = tmp[0];
for (int i = 1; i < tmp.size(); i++){
if (Result_Sort1.result>tmp[i])
{
Result_Sort1.result = tmp[i];
Result_Sort1.minlocation = i;
}
}
return Result_Sort1;
}
int minlocation;
double result;
};
//排序,对所有点分簇,返回分簇的簇,和距离的大小
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1);
Result_Sort Sort(vector<double> tmp, Result_Sort Result_Sort1){
Result_Sort1.result = tmp[0];
for (int i = 1; i < tmp.size(); i++){
if (Result_Sort1.result>tmp[i])
{
Result_Sort1.result = tmp[i];
Result_Sort1.minlocation = i;
}
}
return Result_Sort1;
}
struct K_Means_Result{
vector<int>Label;
vector<double>CentralPoint;
};
//一维的标准的k-means算法
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1);
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1){
int M, N;
M = Input.size();
N = K_Means_Result1.CentralPoint.size();
vector<vector<double>> Dist(M, vector<double>(N));
vector<double> mindist(M);
vector<double>tmpCen(N);
vector<int> Count(N);
for (int i = 0; i < M; i++){ mindist[i] = 0.0;}
for (int i = 0; i < N; i++){ tmpCen[i] = 0.0; Count[i] = 0; }
//迭代次数
int iteration = 30;
for (int iter = 0; iter<iteration; iter++){
for (int len = 0; len < M ; len++){
for (int kk = 0; kk < N; kk++){
//计算所有点和所有质心的距离
Dist[len][kk] = sqrt((K_Means_Result1.CentralPoint[kk] - Input[len])*(K_Means_Result1.CentralPoint[kk] - Input[len]));
}
//找到所有点的最小的距离,和对应的分类簇
vector<double> tmp;
tmp = Dist[len];
Result_Sort Result_Sort1;
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1=Sort(tmp, Result_Sort1);
K_Means_Result1.Label[len] = Result_Sort1.minlocation;
mindist[len] = Result_Sort1.result;
}
//对每个类重新计算质心
for (int kk = 0; kk < N; kk++){
for (int len = 0; len < M; len++){
if (K_Means_Result1.Label[len] == kk) //判断类别
{
tmpCen[kk] += Input[len];
Count[kk] += 1;
}
}
}
for (int kk = 0; kk < N; kk++){
K_Means_Result1.CentralPoint[kk] = tmpCen[kk] / Count[kk];
}
}
return K_Means_Result1;
}
vector<int>Label;
vector<double>CentralPoint;
};
//一维的标准的k-means算法
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1);
K_Means_Result K_Means(vector<double>Input, K_Means_Result K_Means_Result1){
int M, N;
M = Input.size();
N = K_Means_Result1.CentralPoint.size();
vector<vector<double>> Dist(M, vector<double>(N));
vector<double> mindist(M);
vector<double>tmpCen(N);
vector<int> Count(N);
for (int i = 0; i < M; i++){ mindist[i] = 0.0;}
for (int i = 0; i < N; i++){ tmpCen[i] = 0.0; Count[i] = 0; }
//迭代次数
int iteration = 30;
for (int iter = 0; iter<iteration; iter++){
for (int len = 0; len < M ; len++){
for (int kk = 0; kk < N; kk++){
//计算所有点和所有质心的距离
Dist[len][kk] = sqrt((K_Means_Result1.CentralPoint[kk] - Input[len])*(K_Means_Result1.CentralPoint[kk] - Input[len]));
}
//找到所有点的最小的距离,和对应的分类簇
vector<double> tmp;
tmp = Dist[len];
Result_Sort Result_Sort1;
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1=Sort(tmp, Result_Sort1);
K_Means_Result1.Label[len] = Result_Sort1.minlocation;
mindist[len] = Result_Sort1.result;
}
//对每个类重新计算质心
for (int kk = 0; kk < N; kk++){
for (int len = 0; len < M; len++){
if (K_Means_Result1.Label[len] == kk) //判断类别
{
tmpCen[kk] += Input[len];
Count[kk] += 1;
}
}
}
for (int kk = 0; kk < N; kk++){
K_Means_Result1.CentralPoint[kk] = tmpCen[kk] / Count[kk];
}
}
return K_Means_Result1;
}
//这里也要有结构体。返回Init3toK的参数
struct InitInformation{
vector<int>Label;
vector<double> Sum;
double sum;
vector<double>CentralPoint;
};
struct InitInformation{
vector<int>Label;
vector<double> Sum;
double sum;
vector<double>CentralPoint;
};
//聚类数K>=3时,初始化K>=3的聚类点
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index);
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index){
Dist.clear();
// double index = rand() % (int)(InitInfor.sum) + (InitInfor.sum - (int)InitInfor.sum)*(rand() % 1000 / float(1000.0));
// cout << index << endl;
vector<double> dist;
for (int i = 0; i < InitInfor.Sum.size(); i++){
if (InitInfor.Sum[i]>=index)
{
InitInfor.CentralPoint.push_back(Input[i]); //到底是i还是i+1,反正这里簇中心点被覆盖了
// printf("第%d个点的位置个点的位置",i+1);
// cout << "数值" << Input[i] << endl;
InitInfor.Label[i] = Point - 1; //第point个聚类点的类别
break;
}
}
//再求所有点到所有聚类点的距离
InitInfor.sum = 0;
InitInfor.Sum.clear();
for (int len = 0; len < Input.size(); len++){
for (int kk = 0; kk < InitInfor.CentralPoint.size(); kk++){
double tmp = sqrt((Input[len] - InitInfor.CentralPoint[kk])*(Input[len] - InitInfor.CentralPoint[kk]));
dist.push_back(tmp);
}
//找到最小的添加,顺便归类
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index);
InitInformation Init3toK(vector<double>Dist, vector<double> Input, int Point, InitInformation InitInfor, double index){
Dist.clear();
// double index = rand() % (int)(InitInfor.sum) + (InitInfor.sum - (int)InitInfor.sum)*(rand() % 1000 / float(1000.0));
// cout << index << endl;
vector<double> dist;
for (int i = 0; i < InitInfor.Sum.size(); i++){
if (InitInfor.Sum[i]>=index)
{
InitInfor.CentralPoint.push_back(Input[i]); //到底是i还是i+1,反正这里簇中心点被覆盖了
// printf("第%d个点的位置个点的位置",i+1);
// cout << "数值" << Input[i] << endl;
InitInfor.Label[i] = Point - 1; //第point个聚类点的类别
break;
}
}
//再求所有点到所有聚类点的距离
InitInfor.sum = 0;
InitInfor.Sum.clear();
for (int len = 0; len < Input.size(); len++){
for (int kk = 0; kk < InitInfor.CentralPoint.size(); kk++){
double tmp = sqrt((Input[len] - InitInfor.CentralPoint[kk])*(Input[len] - InitInfor.CentralPoint[kk]));
dist.push_back(tmp);
}
//找到最小的添加,顺便归类
Result_Sort Result_Sort1;
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1 = Sort(dist, Result_Sort1); //所分的中心点,怎么存储呢?每个点所属的类,result和location没有return
// cout << Result_Sort1.result << endl;
Dist.push_back(Result_Sort1.result);
dist.clear();
}
for (int i = 0; i < Dist.size(); i++){
InitInfor.sum += Dist[i];
InitInfor.Sum.push_back(InitInfor.sum);
}
return InitInfor;
}
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1 = Sort(dist, Result_Sort1); //所分的中心点,怎么存储呢?每个点所属的类,result和location没有return
// cout << Result_Sort1.result << endl;
Dist.push_back(Result_Sort1.result);
dist.clear();
}
for (int i = 0; i < Dist.size(); i++){
InitInfor.sum += Dist[i];
InitInfor.Sum.push_back(InitInfor.sum);
}
return InitInfor;
}
K_Means_Result K_meansplusplus(vector<double> Input, int K){
//随机选择第一个聚类中心点
vector<double> CentralPoint; //K个聚类中心点
vector<int> Label(Input.size()); //标签表明分类
srand(time(NULL));
int ind=(rand() % K); //0到K-1的整数
CentralPoint.push_back(Input[ind]);
// cout << "第一个点的位置" << ind +1<< endl;
// cout << "第一个点的数值" << Input[ind] << endl;
for (int i = 0; i < Input.size(); i++){ Label[i] = -1; }
Label[ind] = 0; //第一个聚类点的类别
//随机选择第一个聚类中心点
vector<double> CentralPoint; //K个聚类中心点
vector<int> Label(Input.size()); //标签表明分类
srand(time(NULL));
int ind=(rand() % K); //0到K-1的整数
CentralPoint.push_back(Input[ind]);
// cout << "第一个点的位置" << ind +1<< endl;
// cout << "第一个点的数值" << Input[ind] << endl;
for (int i = 0; i < Input.size(); i++){ Label[i] = -1; }
Label[ind] = 0; //第一个聚类点的类别
//对于每一个数据点,计算和该中心点的距离,包括它自身
vector<double> Dist;
//需要一个vector<vector<double>> 存储每个点以及所属的类,并未采用
vector<double> Sum;
double sum = 0;
for (int i = 0; i < Input.size(); i++){
double dist;
dist = sqrt((Input[i] - CentralPoint[0])*(Input[i] - CentralPoint[0]));//欧式距离
Dist.push_back(dist);
// cout << dist << endl;
sum += dist;
Sum.push_back(sum);
}
//选择一个新的数据点作为新的聚类中心,选择的原则是:D(x)较大的点,被选取作为聚类中心的概率较大
//第二个点的选择,将所有的距离求和,Dist[]/Sum(Dist),乘随机数概率,最大的点为第二个点
double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0)); //找到index处于哪个点的距离区间
for (int i = 0; i < Sum.size(); i++){
if (Sum[i]<index && Sum[i + 1]>index)
{
CentralPoint.push_back(Input[i]);
// cout<<"第二个点的位置" << i+1 << endl;
// cout << "第二个点的数值" << Input[i] << endl;
Label[i] = 1; //第二个聚类点的类别
}
}
Dist.clear();
sum = 0;
Sum.clear();
vector<double> dist;
//计算欧氏距离
for (int i = 0; i < Input.size(); i++){
for (int j = 0; j < CentralPoint.size(); j++)
{
double tmp = sqrt((Input[i] - CentralPoint[j])*(Input[i] - CentralPoint[j]));
dist.push_back(tmp);
}
//找到最小的添加,顺便归类
Result_Sort Result_Sort1;
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1 = Sort(dist, Result_Sort1); //Pass
Dist.push_back(Result_Sort1.result);
dist.clear();
}
for (int i = 0; i < Dist.size(); i++){
sum += Dist[i];
Sum.push_back(sum);
}
//第三个点的选择,第四个点的选择,,,都有所不同,大概递归调用//直至选择K个中心点
if (K >= 3){
vector<double> Dist3toK(Input.size());
for (int i = 3; i <= K; i++){
double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0));
InitInformation InitInfor;
InitInfor.Label = Label;
InitInfor.CentralPoint = CentralPoint;
InitInfor.Sum = Sum;
InitInfor.sum = sum;
InitInfor = Init3toK(Dist3toK, Input, i, InitInfor, index); // double 型函数return double,然后这里的return Label返回值并没有覆盖原值,可能只有CentralPoint的值覆盖改变了
CentralPoint = InitInfor.CentralPoint;
Label=InitInfor.Label;
}
}
//一维的标准的k-means算法
K_Means_Result K_Means_Result1;
K_Means_Result1.CentralPoint = CentralPoint;
K_Means_Result1.Label = Label;
K_Means_Result1 = K_Means(Input, K_Means_Result1);
//输出
return K_Means_Result1;
}
vector<double> Dist;
//需要一个vector<vector<double>> 存储每个点以及所属的类,并未采用
vector<double> Sum;
double sum = 0;
for (int i = 0; i < Input.size(); i++){
double dist;
dist = sqrt((Input[i] - CentralPoint[0])*(Input[i] - CentralPoint[0]));//欧式距离
Dist.push_back(dist);
// cout << dist << endl;
sum += dist;
Sum.push_back(sum);
}
//选择一个新的数据点作为新的聚类中心,选择的原则是:D(x)较大的点,被选取作为聚类中心的概率较大
//第二个点的选择,将所有的距离求和,Dist[]/Sum(Dist),乘随机数概率,最大的点为第二个点
double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0)); //找到index处于哪个点的距离区间
for (int i = 0; i < Sum.size(); i++){
if (Sum[i]<index && Sum[i + 1]>index)
{
CentralPoint.push_back(Input[i]);
// cout<<"第二个点的位置" << i+1 << endl;
// cout << "第二个点的数值" << Input[i] << endl;
Label[i] = 1; //第二个聚类点的类别
}
}
Dist.clear();
sum = 0;
Sum.clear();
vector<double> dist;
//计算欧氏距离
for (int i = 0; i < Input.size(); i++){
for (int j = 0; j < CentralPoint.size(); j++)
{
double tmp = sqrt((Input[i] - CentralPoint[j])*(Input[i] - CentralPoint[j]));
dist.push_back(tmp);
}
//找到最小的添加,顺便归类
Result_Sort Result_Sort1;
Result_Sort1.minlocation = 0;
Result_Sort1.result = 0.0;
Result_Sort1 = Sort(dist, Result_Sort1); //Pass
Dist.push_back(Result_Sort1.result);
dist.clear();
}
for (int i = 0; i < Dist.size(); i++){
sum += Dist[i];
Sum.push_back(sum);
}
//第三个点的选择,第四个点的选择,,,都有所不同,大概递归调用//直至选择K个中心点
if (K >= 3){
vector<double> Dist3toK(Input.size());
for (int i = 3; i <= K; i++){
double index = rand() % (int)sum + (sum - (int)sum)*(rand() % 1000 / float(1000.0));
InitInformation InitInfor;
InitInfor.Label = Label;
InitInfor.CentralPoint = CentralPoint;
InitInfor.Sum = Sum;
InitInfor.sum = sum;
InitInfor = Init3toK(Dist3toK, Input, i, InitInfor, index); // double 型函数return double,然后这里的return Label返回值并没有覆盖原值,可能只有CentralPoint的值覆盖改变了
CentralPoint = InitInfor.CentralPoint;
Label=InitInfor.Label;
}
}
//一维的标准的k-means算法
K_Means_Result K_Means_Result1;
K_Means_Result1.CentralPoint = CentralPoint;
K_Means_Result1.Label = Label;
K_Means_Result1 = K_Means(Input, K_Means_Result1);
//输出
return K_Means_Result1;
}
/*K-means++ 算法,对于一维数据,给定聚类中心数K,自适应的确定初始聚类中心*/
int main(){
double Myarray[20] = { 1.1, 2.2, 3.7, 5.6, 7.9, 9.9, 0.1, 4.6, 8.5, 1.2, 15.4, 15.3, 1.1, 1.6, 9.0, 8.7, 4.4, 5.9, 0.01, 0.0091 };
vector<double> Input(Myarray, Myarray + 20);
int K = 4; //K>=2 现在处理4和以上的就崩溃
K_Means_Result K_meansplusplus(vector<double> Input, int K);//输入数据和聚类数
K_Means_Result K_Means_Result1;
K_Means_Result1 = K_meansplusplus(Input, K);
for (int i = 0; i < K_Means_Result1.Label.size(); i++){cout << "点位置" << Input[i] << "所属 类簇" << K_Means_Result1.Label[i] << endl;}
for (int i = 0; i < K_Means_Result1.CentralPoint.size(); i++){cout << "类中心点" << K_Means_Result1.CentralPoint[i] << endl;}
K_Means_Result1.Label.clear();
K_Means_Result1.CentralPoint.clear();
char ch = getchar();
return 0;
}
int main(){
double Myarray[20] = { 1.1, 2.2, 3.7, 5.6, 7.9, 9.9, 0.1, 4.6, 8.5, 1.2, 15.4, 15.3, 1.1, 1.6, 9.0, 8.7, 4.4, 5.9, 0.01, 0.0091 };
vector<double> Input(Myarray, Myarray + 20);
int K = 4; //K>=2 现在处理4和以上的就崩溃
K_Means_Result K_meansplusplus(vector<double> Input, int K);//输入数据和聚类数
K_Means_Result K_Means_Result1;
K_Means_Result1 = K_meansplusplus(Input, K);
for (int i = 0; i < K_Means_Result1.Label.size(); i++){cout << "点位置" << Input[i] << "所属 类簇" << K_Means_Result1.Label[i] << endl;}
for (int i = 0; i < K_Means_Result1.CentralPoint.size(); i++){cout << "类中心点" << K_Means_Result1.CentralPoint[i] << endl;}
K_Means_Result1.Label.clear();
K_Means_Result1.CentralPoint.clear();
char ch = getchar();
return 0;
}
阅读全文
0 0
- Kmeans算法java代码
- Kmeans++算法C++代码
- kmeans算法及python代码参考
- kmeans算法及python代码参考
- 2、机器学习算法KMeans -- Java代码
- java代码 kmeans算法实现 图像分割
- KMeans 算法
- Kmeans算法
- kmeans算法
- Kmeans算法
- Kmeans算法
- kmeans++算法
- Kmeans算法
- 数据挖掘--kmeans聚类算法mapreduce实现代码<转>
- 数据挖掘--kmeans聚类算法mapreduce实现 代码
- 数据挖掘--kmeans聚类算法mapreduce实现 代码
- delphi pascal 写的 fcm kmeans 模糊C-均值算法
- 用c语言写的kmeans算法,不是很完善
- 半年全球网络安全入侵事件近千起,超19亿数据受影响
- jenkins将svn用户名和邮件地址映射的方法
- Spring/Boot/Cloud系列知识(4)——代理模式(下)
- eclipse查看一个方法被谁引用(调用)的快捷键四种方式
- 【资源共享】《Rockchip 量产烧录 指南 V1.0》
- Kmeans++算法C++代码
- 第四周项目1—建立单链表
- SlidingMenu侧拉界面
- git-windows安装
- php无限级分类实现评论及回复
- 面向对象设计原则—高内聚、低耦合。多聚合、少继承
- 实现select标签不通过ctrl实现多选,通过点击实现多选
- 数据结构实验一线性表的基本操作实现及其应用
- jn项目-解决前台中文参数传到后台乱码问题