决策树ID3算法

来源:互联网 发布:西门子840d宏程序编程 编辑:程序博客网 时间:2024/06/15 04:59

一、实验内容
已知:流感训练数据集,预定义两个类别;
求:用ID3算法建立流感的属性描述决策树
流感训练数据集
这里写图片描述

二、实验设计(原理分析及流程)
ID3算法的实现,第一步先从三个属性中计算出针对决策属性(患流感与否)的条件熵,找出最小值的一个,然后根据该节点的分类属性再进行分支,取其分类属性进行条件熵的计算,最后得出决策树。

三、代码:

#include <stdio.h>#include <stdbool.h>#include <stdlib.h>#include <math.h>#define arrlen 7typedef struct Info{    int num;// from 0 to 7    bool headache;// 1 means having    bool courbature;    int temperature; // 0: normal , 1: high , 2: very high    bool flu;  // 1 means having} info;double Entropy012 ( const int * a, const int * b, int rownum ){    int i;    double ty00, ty01, ty10, ty11, ty20, ty21, ty0, ty1, ty2, entropy, entropy0, entropy1, entropy2;    ty00 = ty01 = ty10 = ty11 = ty20 = ty21 =  0.0;    for ( i = 0; i < rownum; i++ )    {        if ( a[i] == 0 && b[i] == 0 )            ty00++; // type0 flu = 0        else if ( a[i] == 0 && b[i]  == 1 )            ty01++; // type 0 flu = 1        else if ( a[i] == 1 && b[i] == 0 )            ty10++; // type1 flu = 0        else if ( a[i] == 1 && b[i] == 1 )            ty11++; // type1 flu = 1        else if ( a[i] == 2 && b[i] == 0 )            ty20++;        else if ( a[i] == 2 && b[i] == 1 )            ty21++;    }    ty0 = ty01 + ty00, ty1 = ty10 + ty11, ty2 = ty20 + ty21; // total of type0 and type1    if ( !ty01 || !ty00 )        entropy0 = 0;    else        entropy0 = - (( ty00 ) / ty0) * log2( ty00 / ty0)                   + - (( ty01 ) / ty0) * log2( ty01 / ty0); // H(S0)    if ( !ty10 || !ty11 )        entropy1 = 0;    else        entropy1 = - (( ty10 ) / ty1) * log2( ty10 / ty1)                   + - (( ty11 ) / ty1) * log2( ty11 / ty1); // H(S1)    if ( !ty20 || !ty21 )        entropy2 = 0;    else        entropy2 = - (( ty20 ) / ty2) * log2( ty20 / ty2)                   + - (( ty21 ) / ty2) * log2( ty21 / ty2); // H(S2)    printf ( "eo:%f, e1:%f, e2:%f\n", entropy0, entropy1, entropy2);    entropy = ( ty0 / rownum ) * entropy0 + ( ty1 / rownum ) * entropy1 + ( ty2 / rownum ) * entropy2;    printf ( "ty0: %f, ty1: %f, ty2: %f, entropy:%f\n", ty0, ty1, ty2, entropy);    return entropy;}int MinNode ( double a, double b, double c ){    if ( a < b && a < c )        return 0;    else if ( b < a && b < c )        return 1;    else if ( c < a && c < b )        return 2;    return -1;}int main ( void ){    int i, j, num = 0;    double a, b, c;    char * res = ( char * )malloc( 12 * sizeof ( char ));    info InArr[arrlen] =    {        { 1, 1, 1, 0, 0},        { 2, 1, 1, 1, 1},        { 3, 1, 1, 2, 1},        { 4, 0, 1, 0, 0},        { 5, 0, 0, 1, 0},        { 6, 0, 1, 2, 1},        { 7, 1, 0, 1, 1}    }; // input the table, and create three arrays for convenience    int headache[arrlen];int courbature[arrlen] ;int temperature[arrlen];int flu[arrlen];    for ( i = 0; i < arrlen; i++ )        headache[i] = InArr[i].headache;    for ( i = 0; i < arrlen; i++ )        courbature[i] = InArr[i].courbature;    for ( i = 0; i < arrlen; i++ )        temperature[i] = InArr[i].temperature;    for ( i = 0; i < arrlen; i++ )        flu[i] = InArr[i].flu;    a = Entropy012 ( headache, flu, arrlen ),b = Entropy012 ( courbature, flu, arrlen ),c = Entropy012 ( temperature, flu, arrlen );    j = MinNode ( a, b, c );    if ( j == 0 ) res = "headache";    else if ( j == 1 ) res = "courbature";    else if ( j == 2 )  res = "temperature";    else res = "error!";    printf ( "The top:%s.\n Normal: no flu, very high: flu\n", res ); // temperature is very high indicates having flu    for ( i = 0; i < arrlen; i++ )    {        if ( InArr[i].temperature == 1 )            num++;    }        printf ( "The number:%d\n", num );    int subhead[num]; int subcour[num]; int subflu[num];    for ( i = 0, num = 0; i < arrlen; i++ )    {        if ( InArr[i].temperature == 1)        {            subhead[num] = InArr[i].headache, subcour[num] = InArr[i].courbature,            subflu[num] = InArr[i].flu;            num++;        }    }    a = Entropy012 ( subhead, subflu, num ), b = Entropy012 ( subcour, subflu, num );    if ( a < b ) res = "headache";    else if ( a > b ) res = "courbature";    else res = "Error!";    printf ( "The child of the temperature(high):%s.\n having: flu, not having: no flu\n", res );    return 0;}
原创粉丝点击