knn的c++版本

来源:互联网 发布:mac照片如何导出 编辑:程序博客网 时间:2024/06/03 15:02

找了几个版本,这个思路最清晰

// knn.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"//////////////////////////////////////////////////////////////////////////////////////////////////////////#include <stdlib.h>#include <stdio.h>#include <memory.h>#include <string.h>#include <iostream>#include <math.h>#include <fstream>using namespace std;////////////////////////////////////////////////////////////////////////////////////////////////////////////    宏定义//////////////////////////////////////////////////////////////////////////////////////////////////////////#define  ATTR_NUM  4                        //属性数目#define  MAX_SIZE_OF_TRAINING_SET  1000      //训练数据集的最大大小#define  MAX_SIZE_OF_TEST_SET      100       //测试数据集的最大大小#define  MAX_VALUE  10000.0                  //属性最大值#define  K  7//结构体struct dataVector{    int ID;                      //ID号    char classLabel[15];             //分类标号    double attributes[ATTR_NUM]; //属性 };struct distanceStruct{    int ID;                      //ID号    double distance;             //距离    char classLabel[15];             //分类标号};////////////////////////////////////////////////////////////////////////////////////////////////////////////    全局变量//////////////////////////////////////////////////////////////////////////////////////////////////////////struct dataVector gTrainingSet[MAX_SIZE_OF_TRAINING_SET]; //训练数据集struct dataVector gTestSet[MAX_SIZE_OF_TEST_SET];         //测试数据集struct distanceStruct gNearestDistance[K];                //K个最近邻距离int curTrainingSetSize=0;                                 //训练数据集的大小int curTestSetSize=0;                                     //测试数据集的大小////////////////////////////////////////////////////////////////////////////////////////////////////////////    求 vector1=(x1,x2,...,xn)和vector2=(y1,y2,...,yn)的欧几里德距离//////////////////////////////////////////////////////////////////////////////////////////////////////////double Distance(struct dataVector vector1,struct dataVector vector2){    double dist,sum=0.0;    for(int i=0;i<ATTR_NUM;i++)    {        sum+=(vector1.attributes[i]-vector2.attributes[i])*(vector1.attributes[i]-vector2.attributes[i]);    }    dist=sqrt(sum);    return dist;}////////////////////////////////////////////////////////////////////////////////////////////////////////////    得到gNearestDistance中的最大距离,返回下标//////////////////////////////////////////////////////////////////////////////////////////////////////////int GetMaxDistance(){    int maxNo=0;    for(int i=1;i<K;i++)    {        if(gNearestDistance[i].distance>gNearestDistance[maxNo].distance)            maxNo = i;    }    return maxNo;}////////////////////////////////////////////////////////////////////////////////////////////////////////////    对未知样本Sample分类//////////////////////////////////////////////////////////////////////////////////////////////////////////char* Classify(struct dataVector Sample){    double dist=0;    int maxid=0,freq[K],i,tmpfreq=1;;    char *curClassLable=gNearestDistance[0].classLabel;    memset(freq,0,sizeof(freq));    //step.1---初始化距离为最大值    for(i=0;i<K;i++)    {        gNearestDistance[i].distance=MAX_VALUE;    }    //step.2---计算K-最近邻距离    for(i=0;i<curTrainingSetSize;i++)    {        //step.2.1---计算未知样本和每个训练样本的距离        dist=Distance(gTrainingSet[i],Sample);        //step.2.2---得到gNearestDistance中的最大距离        maxid=GetMaxDistance();        //step.2.3---如果距离小于gNearestDistance中的最大距离,则将该样本作为K-最近邻样本        if(dist<gNearestDistance[maxid].distance)        {            gNearestDistance[maxid].ID=gTrainingSet[i].ID;            gNearestDistance[maxid].distance=dist;            strcpy(gNearestDistance[maxid].classLabel,gTrainingSet[i].classLabel);        }    }    /// 3、4可以合并优化    //step.3---统计每个类出现的次数    for(i=0;i<K;i++)     {        for(int j=0;j<K;j++)        {            if((i!=j)&&(strcmp(gNearestDistance[i].classLabel,gNearestDistance[j].classLabel)==0))            {                freq[i]+=1;            }        }    }    //step.4---选择出现频率最大的类标号    for(i=0;i<K;i++)    {        if(freq[i]>tmpfreq)         {            tmpfreq=freq[i];            curClassLable=gNearestDistance[i].classLabel;        }    }    return curClassLable;}////////////////////////////////////////////////////////////////////////////////////////////////////////////    主函数//////////////////////////////////////////////////////////////////////////////////////////////////////////void main(){      char c;    char *classLabel="";    int i,j, rowNo=0,TruePositive=0,FalsePositive=0;    ifstream filein("iris.data");    FILE *fp;    if(filein.fail()){cout<<"Can't open data.txt"<<endl; return;}    //step.1---读文件     while(!filein.eof())    {        rowNo++;//第一组数据rowNo=1        if(curTrainingSetSize>=MAX_SIZE_OF_TRAINING_SET)        {            cout<<"The training set has "<<MAX_SIZE_OF_TRAINING_SET<<" examples!"<<endl<<endl;            break ;        }          //rowNo%3!=0的100组数据作为训练数据集        if(rowNo%3!=0)        {               gTrainingSet[curTrainingSetSize].ID=rowNo;            for(int i = 0;i < ATTR_NUM;i++)            {                     filein>>gTrainingSet[curTrainingSetSize].attributes[i];                filein>>c;            }               filein>>gTrainingSet[curTrainingSetSize].classLabel;            curTrainingSetSize++;        }        //剩下rowNo%3==0的50组做测试数据集        else if(rowNo%3==0)        {            gTestSet[curTestSetSize].ID=rowNo;            for(int i = 0;i < ATTR_NUM;i++)            {                    filein>>gTestSet[curTestSetSize].attributes[i];                filein>>c;            }              filein>>gTestSet[curTestSetSize].classLabel;            curTestSetSize++;        }    }    filein.close();    //step.2---KNN算法进行分类,并将结果写到文件iris_OutPut.txt    fp=fopen("iris_OutPut.txt","w+t");    //用KNN算法进行分类    fprintf(fp,"************************************程序说明***************************************\n");    fprintf(fp,"** 采用KNN算法对iris.data分类。为了操作方便,对各组数据添加rowNo属性,第一组rowNo=1!\n");    fprintf(fp,"** 共有150组数据,选择rowNo模3不等于0的100组作为训练数据集,剩下的50组做测试数据集\n");    fprintf(fp,"***********************************************************************************\n\n");    fprintf(fp,"************************************实验结果***************************************\n\n");    for(i=0;i<curTestSetSize;i++)    {        fprintf(fp,"************************************第%d组数据**************************************\n",i+1);        classLabel =Classify(gTestSet[i]);        if(strcmp(classLabel,gTestSet[i].classLabel)==0)//相等时,分类正确        {            TruePositive++;        }        cout<<"rowNo: ";        cout<<gTestSet[i].ID<<"    \t";        cout<<"KNN分类结果:      ";        cout<<classLabel<<"(正确类标号: ";        cout<<gTestSet[i].classLabel<<")\n";        fprintf(fp,"rowNo:  %3d   \t  KNN分类结果:  %s ( 正确类标号:  %s )\n",gTestSet[i].ID,classLabel,gTestSet[i].classLabel);        if(strcmp(classLabel,gTestSet[i].classLabel)!=0)//不等时,分类错误        {            // cout<<"   ***分类错误***\n";            fprintf(fp,"***分类错误***\n");        }        fprintf(fp,"%d-最临近数据:\n",K);        for(j=0;j<K;j++)        {            // cout<<gNearestDistance[j].ID<<"\t"<<gNearestDistance[j].distance<<"\t"<<gNearestDistance[j].classLabel[15]<<endl;            fprintf(fp,"rowNo:  %3d   \t   Distance:  %f   \tClassLable:    %s\n",gNearestDistance[j].ID,gNearestDistance[j].distance,gNearestDistance[j].classLabel);        }        fprintf(fp,"\n");    }    FalsePositive=curTestSetSize-TruePositive;    fprintf(fp,"***********************************结果分析**************************************\n",i);    fprintf(fp,"TP(True positive): %d\nFP(False positive): %d\naccuracy: %f\n",TruePositive,FalsePositive,double(TruePositive)/(curTestSetSize-1));    fclose(fp);    system("pause");    return;}

以下是测试数据,

5.1,3.5,1.4,0.2,Iris-setosa4.9,3.0,1.4,0.2,Iris-setosa4.7,3.2,1.3,0.2,Iris-setosa4.6,3.1,1.5,0.2,Iris-setosa5.0,3.6,1.4,0.2,Iris-setosa5.4,3.9,1.7,0.4,Iris-setosa4.6,3.4,1.4,0.3,Iris-setosa5.0,3.4,1.5,0.2,Iris-setosa4.4,2.9,1.4,0.2,Iris-setosa4.9,3.1,1.5,0.1,Iris-setosa5.4,3.7,1.5,0.2,Iris-setosa4.8,3.4,1.6,0.2,Iris-setosa4.8,3.0,1.4,0.1,Iris-setosa4.3,3.0,1.1,0.1,Iris-setosa5.8,4.0,1.2,0.2,Iris-setosa5.7,4.4,1.5,0.4,Iris-setosa5.4,3.9,1.3,0.4,Iris-setosa5.1,3.5,1.4,0.3,Iris-setosa5.7,3.8,1.7,0.3,Iris-setosa5.1,3.8,1.5,0.3,Iris-setosa5.4,3.4,1.7,0.2,Iris-setosa5.1,3.7,1.5,0.4,Iris-setosa4.6,3.6,1.0,0.2,Iris-setosa5.1,3.3,1.7,0.5,Iris-setosa4.8,3.4,1.9,0.2,Iris-setosa5.0,3.0,1.6,0.2,Iris-setosa5.0,3.4,1.6,0.4,Iris-setosa5.2,3.5,1.5,0.2,Iris-setosa5.2,3.4,1.4,0.2,Iris-setosa4.7,3.2,1.6,0.2,Iris-setosa4.8,3.1,1.6,0.2,Iris-setosa5.4,3.4,1.5,0.4,Iris-setosa5.2,4.1,1.5,0.1,Iris-setosa5.5,4.2,1.4,0.2,Iris-setosa4.9,3.1,1.5,0.1,Iris-setosa5.0,3.2,1.2,0.2,Iris-setosa5.5,3.5,1.3,0.2,Iris-setosa4.9,3.1,1.5,0.1,Iris-setosa4.4,3.0,1.3,0.2,Iris-setosa5.1,3.4,1.5,0.2,Iris-setosa5.0,3.5,1.3,0.3,Iris-setosa4.5,2.3,1.3,0.3,Iris-setosa4.4,3.2,1.3,0.2,Iris-setosa5.0,3.5,1.6,0.6,Iris-setosa5.1,3.8,1.9,0.4,Iris-setosa4.8,3.0,1.4,0.3,Iris-setosa5.1,3.8,1.6,0.2,Iris-setosa4.6,3.2,1.4,0.2,Iris-setosa5.3,3.7,1.5,0.2,Iris-setosa5.0,3.3,1.4,0.2,Iris-setosa7.0,3.2,4.7,1.4,Iris-versicolor6.4,3.2,4.5,1.5,Iris-versicolor6.9,3.1,4.9,1.5,Iris-versicolor5.5,2.3,4.0,1.3,Iris-versicolor6.5,2.8,4.6,1.5,Iris-versicolor5.7,2.8,4.5,1.3,Iris-versicolor6.3,3.3,4.7,1.6,Iris-versicolor4.9,2.4,3.3,1.0,Iris-versicolor6.6,2.9,4.6,1.3,Iris-versicolor5.2,2.7,3.9,1.4,Iris-versicolor5.0,2.0,3.5,1.0,Iris-versicolor5.9,3.0,4.2,1.5,Iris-versicolor6.0,2.2,4.0,1.0,Iris-versicolor6.1,2.9,4.7,1.4,Iris-versicolor5.6,2.9,3.6,1.3,Iris-versicolor6.7,3.1,4.4,1.4,Iris-versicolor5.6,3.0,4.5,1.5,Iris-versicolor5.8,2.7,4.1,1.0,Iris-versicolor6.2,2.2,4.5,1.5,Iris-versicolor5.6,2.5,3.9,1.1,Iris-versicolor5.9,3.2,4.8,1.8,Iris-versicolor6.1,2.8,4.0,1.3,Iris-versicolor6.3,2.5,4.9,1.5,Iris-versicolor6.1,2.8,4.7,1.2,Iris-versicolor6.4,2.9,4.3,1.3,Iris-versicolor6.6,3.0,4.4,1.4,Iris-versicolor6.8,2.8,4.8,1.4,Iris-versicolor6.7,3.0,5.0,1.7,Iris-versicolor6.0,2.9,4.5,1.5,Iris-versicolor5.7,2.6,3.5,1.0,Iris-versicolor5.5,2.4,3.8,1.1,Iris-versicolor5.5,2.4,3.7,1.0,Iris-versicolor5.8,2.7,3.9,1.2,Iris-versicolor6.0,2.7,5.1,1.6,Iris-versicolor5.4,3.0,4.5,1.5,Iris-versicolor6.0,3.4,4.5,1.6,Iris-versicolor6.7,3.1,4.7,1.5,Iris-versicolor6.3,2.3,4.4,1.3,Iris-versicolor5.6,3.0,4.1,1.3,Iris-versicolor5.5,2.5,4.0,1.3,Iris-versicolor5.5,2.6,4.4,1.2,Iris-versicolor6.1,3.0,4.6,1.4,Iris-versicolor5.8,2.6,4.0,1.2,Iris-versicolor5.0,2.3,3.3,1.0,Iris-versicolor5.6,2.7,4.2,1.3,Iris-versicolor5.7,3.0,4.2,1.2,Iris-versicolor5.7,2.9,4.2,1.3,Iris-versicolor6.2,2.9,4.3,1.3,Iris-versicolor5.1,2.5,3.0,1.1,Iris-versicolor5.7,2.8,4.1,1.3,Iris-versicolor6.3,3.3,6.0,2.5,Iris-virginica5.8,2.7,5.1,1.9,Iris-virginica7.1,3.0,5.9,2.1,Iris-virginica6.3,2.9,5.6,1.8,Iris-virginica6.5,3.0,5.8,2.2,Iris-virginica7.6,3.0,6.6,2.1,Iris-virginica4.9,2.5,4.5,1.7,Iris-virginica7.3,2.9,6.3,1.8,Iris-virginica6.7,2.5,5.8,1.8,Iris-virginica7.2,3.6,6.1,2.5,Iris-virginica6.5,3.2,5.1,2.0,Iris-virginica6.4,2.7,5.3,1.9,Iris-virginica6.8,3.0,5.5,2.1,Iris-virginica5.7,2.5,5.0,2.0,Iris-virginica5.8,2.8,5.1,2.4,Iris-virginica6.4,3.2,5.3,2.3,Iris-virginica6.5,3.0,5.5,1.8,Iris-virginica7.7,3.8,6.7,2.2,Iris-virginica7.7,2.6,6.9,2.3,Iris-virginica6.0,2.2,5.0,1.5,Iris-virginica6.9,3.2,5.7,2.3,Iris-virginica5.6,2.8,4.9,2.0,Iris-virginica7.7,2.8,6.7,2.0,Iris-virginica6.3,2.7,4.9,1.8,Iris-virginica6.7,3.3,5.7,2.1,Iris-virginica7.2,3.2,6.0,1.8,Iris-virginica6.2,2.8,4.8,1.8,Iris-virginica6.1,3.0,4.9,1.8,Iris-virginica6.4,2.8,5.6,2.1,Iris-virginica7.2,3.0,5.8,1.6,Iris-virginica7.4,2.8,6.1,1.9,Iris-virginica7.9,3.8,6.4,2.0,Iris-virginica6.4,2.8,5.6,2.2,Iris-virginica6.3,2.8,5.1,1.5,Iris-virginica6.1,2.6,5.6,1.4,Iris-virginica7.7,3.0,6.1,2.3,Iris-virginica6.3,3.4,5.6,2.4,Iris-virginica6.4,3.1,5.5,1.8,Iris-virginica6.0,3.0,4.8,1.8,Iris-virginica6.9,3.1,5.4,2.1,Iris-virginica6.7,3.1,5.6,2.4,Iris-virginica6.9,3.1,5.1,2.3,Iris-virginica5.8,2.7,5.1,1.9,Iris-virginica6.8,3.2,5.9,2.3,Iris-virginica6.7,3.3,5.7,2.5,Iris-virginica6.7,3.0,5.2,2.3,Iris-virginica6.3,2.5,5.0,1.9,Iris-virginica6.5,3.0,5.2,2.0,Iris-virginica6.2,3.4,5.4,2.3,Iris-virginica5.9,3.0,5.1,1.8,Iris-virginica
1 0
原创粉丝点击