kmeans聚类算法

来源:互联网 发布:网络中央控制主机 编辑:程序博客网 时间:2024/05/09 16:43

给出训练集,将所给数据集进行聚类算法处理并利用opencv进行图形化界面显示。

#include <opencv2/core/core.hpp>  #include <opencv2/highgui/highgui.hpp>#include <opencv2/opencv.hpp>#include <iostream>#include <fstream>#include <iterator>#include <tuple>#include <time.h>#include <math.h>#include <stdlib.h>#define k 2using namespace cv;using namespace std;vector<Point2f> readFile(string filePath) {vector<Point2f> result;ifstream fileIn(filePath, ios_base::in);if (!fileIn.is_open()) {cout << "读取文件失败" << endl;}else {double** data = new double*[2000];for (int i = 0; i < 2000; i++) {data[i] = new double[2];for (int j = 0; j < 2; j++) {fileIn >> data[i][j];}// 将数据做放大处理,方便后续用图形化界面展示Point2f p((data[i][0] + 4) * 100, (data[i][1] + 4) * 100);//cout << p.x << "\t " << p.y << endl;result.push_back(p);}fileIn.close();return result;}}void frogy(vector<Point2f> data, Point2f* means) {// 随机数种子srand((unsigned)time(NULL));int random[k];bool jump = true;for (int i = 0; i < k; i++) {do {jump = false;random[i] = rand() % data.size();for (int j = 0; j < i; j++)// 避免随机选择到重复的均值点if (random[i] == random[j])jump = true;} while (jump);means[i] = data[random[i]];//cout << means[i].x << "\t" << means[i].y << endl;}}double WCSS(Point2f data1, Point2f data2) {double sum = 0;sum += (data1.x - data2.x) * (data1.x - data2.x);sum += (data1.y - data2.y) * (data1.y - data2.y);return sqrt(sum);}int classifyCluster(Point2f* means, Point2f data) {int flag = 0;double min = 99999;for (int i = 0; i < k; i++) {if (WCSS(means[i], data) < min) {flag = i;min = WCSS(means[i], data);}}return flag;}Point2f getNewMeans(vector<Point2f> cluster) {int num = cluster.size();double sum[2] = { 0 };for (int i = 0; i < num; i++) {sum[0] += cluster[i].x;sum[1] += cluster[i].y;}Point2f newMean(sum[0] / num, sum[1] / num);return newMean;}void print(vector<Point2f> *clusters, Point2f *means) {Scalar colorTab[] = {Scalar(0, 0, 255), //红Scalar(0, 255, 0), // 绿Scalar(255, 100, 100), // 蓝Scalar(0, 255, 255), // 黄Scalar(255, 0, 255), // 紫Scalar(255, 255, 0), // 浅蓝Scalar(255, 255, 255), // 白};Mat img(800, 800, CV_8UC3);img = Scalar::all(0);for (int j = 0; j < k; j++) {for (int i = 0; i < clusters[j].size(); i++) {circle(img, clusters[j][i], 2, colorTab[j + 1], CV_FILLED, CV_AA);}circle(img, means[j], 4, colorTab[0], CV_FILLED, CV_AA);}cvNamedWindow("result");imshow("result", img);waitKey(NULL);}int main() {vector<Point2f> clusters[k]; // k个聚类Point2f means[k]; // k个均值点string filePath = "C:\\Users\\Administrator\\Desktop\\kMeansData.txt";// 将2k个样本读入data中vector<Point2f> data = readFile(filePath);//cout << data.size() << endl;// 1.Frogy方法——随机选择K个样本作为初始均值点frogy(data, means);//cout << get<0>(means[0]) << "\t" << get<1>(means[0]) << "\t" << get<1>(means[0]) << endl;//cout << get<0>(means[1]) << "\t" << get<1>(means[1]) << "\t" << get<1>(means[1]) << endl;// 将剩余样本归类int flag;for (int i = 0; i < data.size(); i++) {flag = classifyCluster(means, data[i]);clusters[flag].push_back(data[i]);}print(clusters, means);Point2f oldMeans[k] = { (0, 0) };//int counter = 0;//cout << WCSS(means[0], oldMeans[0]) << endl << WCSS(means[1], oldMeans[1]) << endl;;bool jump = true;;while (jump) {jump = false;for (int i = 0; i < k; i++)if (WCSS(means[i], oldMeans[i]) > 1)jump = true;//cout << counter++ << endl;// 重新计算新的均值点for (int i = 0; i < k; i++) {oldMeans[i] = means[i];means[i] = getNewMeans(clusters[i]);}// 清空每个聚类for (int i = 0; i < k; i++) {clusters[i].clear();}// 根据新的均值点获得新的聚类for (int i = 0; i < data.size(); i++) {flag = classifyCluster(means, data[i]);clusters[flag].push_back(data[i]);}print(clusters, means);}// 用图形化界面展示print(clusters, means);}


0 0