IRIS数据采用Kmeans方法的C++实现

来源:互联网 发布:y系列电机下线数据大全 编辑:程序博客网 时间:2024/04/29 09:42

IRIS数据采用Kmeans方法的C++实现

参考:http://blog.csdn.net/cai0538/article/details/7061922

#include<iostream>#include<fstream>#include<vector>#include<math.h>#include<string>#include<sstream>#include<stdlib.h>#include<time.h>#define k 3using namespace std;//构建属性结构,读取txt文件中的字符串struct Tuple {string attr1;string attr2;string attr3;string attr4;string attr5;};//构建属性结构,将数据转换为所需要的数据以及字符串struct Tuple1 {float attr1;float attr2;float attr3;float attr4;string attr5;};//计算两个样本之间的欧式距离float getDisXY(Tuple1 t1, Tuple1 t2) {float dis = 0;dis = sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) + (t1.attr2 - t2.attr2)*(t1.attr2 - t2.attr2)+ (t1.attr3 - t2.attr3)*(t1.attr3 - t2.attr3) + (t1.attr4 - t2.attr4)*(t1.attr4 - t2.attr4));return dis;}//根据质心判断属于那一类int clusterOfTuple1(Tuple1 means[], Tuple1 tuple1) {float dis = getDisXY(means[0], tuple1);float temp = dis;int label = 0;for (int i = 1; i < k; i++) {dis = getDisXY(means[i], tuple1);if (dis < temp) {label = i;temp = dis;};}return label;}//计算每类质心的平方误差float getVar(Tuple1 means[], vector<Tuple1> cluster[]) {float var = 0;for (int i = 0; i < k; i++) {vector<Tuple1> t = cluster[i];for (vector<Tuple1>::size_type j = 0; j < t.size(); j++) {var = var + getDisXY(means[i], t[j]);}}return var;}//计算当前类质心的平均值Tuple1 getMeans(vector<Tuple1> cluster) {float means1 = 0, means2 = 0, means3 = 0, means4 = 0;int num = cluster.size();Tuple1 t;for (int i = 0; i < num; i++) {means1 += cluster[i].attr1;means2 += cluster[i].attr2;means3 += cluster[i].attr3;means4 += cluster[i].attr4;}t.attr1 = means1/num; t.attr2 = means2/num; t.attr3 = means3/num; t.attr4 = means4/num;return t;}//Kmeans算法void Kmeans(vector<Tuple1> tuples1) {vector<Tuple1> cluster[k];Tuple1 means[k];//初始化三个随机数int t_num = tuples1.size();int rand_num[3];srand((unsigned)time(NULL));for (int i = 0; i < k; i++) {rand_num[i] = rand() % t_num;means[i] = tuples1[rand_num[i]];cout << rand_num[i] << '\n' << endl;}//根据默认的质心给簇赋值int label = 0;for (int i = 0; i < t_num; i++) {label = clusterOfTuple1(means, tuples1[i]);cluster[label].push_back(tuples1[i]);}//输出刚开始的簇分类for (int i = 0; i < k; i++) {cout << "第" << i << "簇" << endl;vector<Tuple1> t = cluster[i];for (int j = 0; j < t.size(); j++) {cout << t[j].attr1 << " " << t[j].attr2 << " " << t[j].attr3 << " " << t[j].attr4 << " " << endl;}cout << t.size() << endl;}//当两次误差小于1时结束学习float oldVar = -1;float newVar = getVar(means, cluster);int i_num = 0;cout << "输出初始误差" << newVar << endl;while (abs(oldVar - newVar) >= 0.0001) {//更新每个簇的均值for (int i = 0; i < k; i++) {means[i] = getMeans(cluster[i]);}oldVar = newVar;newVar = getVar(means, cluster);//清空每个簇for (int i = 0; i < k; i++) {cluster[i].clear();}//更新簇for (int i = 0; i< t_num; i++) {label = clusterOfTuple1(means, tuples1[i]);cluster[label].push_back(tuples1[i]);}i_num++;}cout << "i_num:" << i_num << endl;//输出学习后的分类cout << "学习后的分类" << endl;for (int i = 0; i < k; i++) {cout << "第" << i << "簇" << endl;vector<Tuple1> t = cluster[i];for (int j = 0; j < t.size(); j++) {cout << t[j].attr1 << " " << t[j].attr2 << " " << t[j].attr3 << " " << t[j].attr4 << " " << endl;}cout << t.size() << endl;}cout << "输出误差:" << newVar << endl;}int main() {char fname[255] = "iris.txt";/*cout << "请输入文件路径" << endl;cin >> fname;cout << endl;*/ifstream infile;infile.open(fname, ios::in);if (!infile) {cout << "文件打开错误" <<endl;while (1);return 0;}//输入样本的特征int count = 0;vector<Tuple> tuples;vector<Tuple1> tuples1;Tuple tuple;Tuple1 tuple1;int remainder = 0;//从文件中读取数据while (!infile.eof()) {count++;remainder = count % 5;switch(remainder){case 0:getline(infile,tuple.attr5,'\n');tuples.push_back(tuple);break;case 1:getline(infile, tuple.attr1, ',');break;case 2:getline(infile, tuple.attr2, ',');break;case 3:getline(infile, tuple.attr3, ',');break;case 4:getline(infile, tuple.attr4, ',');break;}}for (vector<Tuple>::size_type i = 0; i != tuples.size(); i++) {tuple1.attr1 = stof(tuples[i].attr1);tuple1.attr2 = stof(tuples[i].attr2);tuple1.attr3 = stof(tuples[i].attr3);tuple1.attr4 = stof(tuples[i].attr4);tuple1.attr5 = tuples[i].attr5;tuples1.push_back(tuple1);//cout << tuples[i].attr1 <<"," <<tuples[i].attr2<<","  << tuples[i].attr3 << "," << tuples[i].attr4 << "," << tuples[i].attr5 << endl;}//输出从文件中读取的数据for (vector<Tuple1>::size_type i = 0; i != tuples1.size(); ++i) {cout << tuples1[i].attr1 <<"," <<tuples1[i].attr2<<","  << tuples1[i].attr3 << "," << tuples1[i].attr4 << "," << tuples1[i].attr5 << endl;}Kmeans(tuples1);while (1);return 0;} 


0 0
原创粉丝点击