kNN算法
来源:互联网 发布:数据化管理pdf下载 编辑:程序博客网 时间:2024/06/05 04:50
算法核心包括三点
1.k值的选择
2.距离的度量
3.分类决策规则
下面是c++源代码实现
#include "stdafx.h"#include <iostream>#include <vector>#include <algorithm>#include <fstream>using namespace std;class sample {public: vector<double> X; int label; double dis; int result;};bool cmp(sample &s1, sample &s2) { return s1.dis < s2.dis;}void knn(sample newx, vector<sample> &traindata, vector<sample> &nearestsample) { int m = traindata.size(); int n = traindata[0].X.size(); double distance = 0; for (int i = 0; i < m; i++) { //采取欧氏距离进行距离度量 distance = 0; for (int j = 0; j < n; j++) { distance += (newx.X[j] - traindata[i].X[j]) * (newx.X[j] - traindata[i].X[j]); } traindata[i].dis = sqrt(distance); } sort(traindata.begin(), traindata.end(), cmp); int k = nearestsample.size(); for (int i = 0; i < k; i++) { nearestsample[i] = traindata[i]; }}int max(int *a, int n) { int maximum = a[0]; int maxindex = 0; int temp = 0; for (int i = 1; i < n; i++) { if (a[i] > maximum) { maximum = a[i]; maxindex = i; } } return maxindex;}int main() { ifstream indata; vector<sample> traindata, testdata; sample rowdata; double temp; int fea = 4; indata.open("D://machineLearning/traindata.txt"); while (!indata.eof()) { for (int i = 0; i < fea + 1; i++) { indata >> temp; if (i < fea) rowdata.X.push_back(temp); else rowdata.label = temp; } traindata.push_back(rowdata); rowdata.X.erase(rowdata.X.begin(), rowdata.X.end()); } indata.close(); indata.open("D://machineLearning/testdata.txt"); while (!indata.eof()) { for (int i = 0; i < fea + 1; i++) { indata >> temp; if (i < fea) rowdata.X.push_back(temp); else rowdata.label = temp; } testdata.push_back(rowdata); rowdata.X.erase(rowdata.X.begin(), rowdata.X.end()); } indata.close(); int N = testdata.size(); vector<sample> nearestsample(5); int label[3] = { 0 }; int resultlabel[3] = { 0,1,2 }; for (int i = 0; i < N; i++) { knn(testdata[i], traindata, nearestsample); label[0] = label[1] = label[2] = 0; //分类决策选择简单的投票法 for (int j = 0; j < 5; j++) { if (nearestsample[j].label == 0) label[0]++; else { if (nearestsample[j].label == 1) label[1]++; else label[2]++; } } testdata[i].result = resultlabel[max(label, 3)]; } for (int i = 0; i < N; i++) { for (int j = 0; j < fea; j++) { cout << testdata[i].X[j] << " "; } cout << testdata[i].label << " "; cout << testdata[i].result << " "; cout << endl; } getchar(); return 0;}
优点:实现简单,对异常值不敏感
缺点:计算复杂,空间复杂度高
参考:
http://blog.csdn.net/mimi9919/article/details/51172095)http://blog.csdn.net/mimi9919/article/details/51172095
阅读全文
0 0
- KNN算法
- KNN算法
- KNN算法
- KNN算法
- kNN算法
- KNN算法
- KNN算法
- KNN算法
- KNN算法
- knn算法
- kNN算法
- knn算法
- KNN算法
- KNN算法
- KNN算法
- KNN算法
- kNN算法
- KNN算法
- Tomcat集群session如何共享详解
- 天气查询ajax and xml
- hdu 3605 Escape (最大流+状态压缩)
- 直接插入排序
- CSIC2010网络攻击数据分词处理
- kNN算法
- scala2.11知识总结 1
- Zookeeper以Windows服务安装运行
- 面试之链表
- comm对比两个文件是否相同
- 递归在方法中的应用
- 线性判别分析(LDA)算法
- 18种排序
- JVM基础之类加载