kNN算法

来源:互联网 发布:数据化管理pdf下载 编辑:程序博客网 时间:2024/06/05 04:50

算法核心包括三点
1.k值的选择
2.距离的度量
3.分类决策规则
下面是c++源代码实现

#include "stdafx.h"#include <iostream>#include <vector>#include <algorithm>#include <fstream>using namespace std;class sample {public:    vector<double> X;    int label;    double dis;    int result;};bool cmp(sample &s1, sample &s2) {    return s1.dis < s2.dis;}void knn(sample newx, vector<sample> &traindata, vector<sample> &nearestsample) {    int m = traindata.size();    int n = traindata[0].X.size();    double distance = 0;    for (int i = 0; i < m; i++) {    //采取欧氏距离进行距离度量        distance = 0;        for (int j = 0; j < n; j++) {            distance += (newx.X[j] - traindata[i].X[j]) * (newx.X[j] - traindata[i].X[j]);        }        traindata[i].dis = sqrt(distance);    }    sort(traindata.begin(), traindata.end(), cmp);    int k = nearestsample.size();    for (int i = 0; i < k; i++) {        nearestsample[i] = traindata[i];    }}int max(int *a, int n) {    int maximum = a[0];    int maxindex = 0;    int temp = 0;    for (int i = 1; i < n; i++) {        if (a[i] > maximum) {            maximum = a[i];            maxindex = i;        }    }    return maxindex;}int main() {    ifstream indata;    vector<sample> traindata, testdata;    sample rowdata;    double temp;    int fea = 4;    indata.open("D://machineLearning/traindata.txt");    while (!indata.eof()) {        for (int i = 0; i < fea + 1; i++) {            indata >> temp;            if (i < fea)                rowdata.X.push_back(temp);            else                rowdata.label = temp;        }        traindata.push_back(rowdata);        rowdata.X.erase(rowdata.X.begin(), rowdata.X.end());    }    indata.close();    indata.open("D://machineLearning/testdata.txt");    while (!indata.eof()) {        for (int i = 0; i < fea + 1; i++) {            indata >> temp;            if (i < fea)                rowdata.X.push_back(temp);            else                rowdata.label = temp;        }        testdata.push_back(rowdata);        rowdata.X.erase(rowdata.X.begin(), rowdata.X.end());    }    indata.close();    int N = testdata.size();    vector<sample> nearestsample(5);    int label[3] = { 0 };    int resultlabel[3] = { 0,1,2 };    for (int i = 0; i < N; i++) {        knn(testdata[i], traindata, nearestsample);        label[0] = label[1] = label[2] = 0;        //分类决策选择简单的投票法        for (int j = 0; j < 5; j++) {            if (nearestsample[j].label == 0)                label[0]++;            else {                if (nearestsample[j].label == 1)                    label[1]++;                else                    label[2]++;            }        }        testdata[i].result = resultlabel[max(label, 3)];    }    for (int i = 0; i < N; i++) {        for (int j = 0; j < fea; j++) {            cout << testdata[i].X[j] << " ";        }        cout << testdata[i].label << " ";        cout << testdata[i].result << " ";        cout << endl;    }    getchar();    return 0;}

优点:实现简单,对异常值不敏感
缺点:计算复杂,空间复杂度高

参考:
http://blog.csdn.net/mimi9919/article/details/51172095)http://blog.csdn.net/mimi9919/article/details/51172095

原创粉丝点击