统计学习方法(二)感知器C语音实现

来源:互联网 发布:java中嵌套循环 编辑:程序博客网 时间:2024/06/06 02:06

感知器(perception)是二分类线性分类模型,其输入为实例的特征向量,输出为实例的类别,取+1和-1的二值,感知机对应于输入空间(特征空间)中将实例划分为正负两类的分离超平面,属于判别模型,感知机学习旨在求出将训练数据进行线性划分的分离超平面,为此导入基于误分类的损失函数,利用梯度下降法对于损失函数进行极小化,求得感知器模型。——–摘自统计学习方法
感知器包含输入层和输出层,主要对线性的数据能有较好的区分效果。
感知器算法总的来说当实例点被误分类,即位于分离的超平面的一侧时,调整w,b的值,使分离超平面向该误分类的一侧移动,以减少该分类点与超平面的距离,直至超平面超过该误分类点使其被正确分类;
这里写图片描述
感知器分为原始形式和对偶形式,两种形式对感知器求解方法不同。具体对偶形式的感知器可以参考知乎:
https://www.zhihu.com/question/26526858
感知器学习算法原始形式:
输入:训练数据集T={(x1,y1),(x2,y2),,(xn,xn)},其中xiχ=Rn,yiγ={1+1},i=1,2,,N;学习率η0<η<1;
输出: w,b;感知器模型f(x)=sign(wx+b)
(1) 选取初值w0b0
(2)在训练集中选取数据xiyi
(3)如果yi(wxi+b)0

ww+ηyixi
bb+ηyi

(4)转至(2),直到训练集中没有误分类的点
算法实现:
以例2.1为例:
这里写图片描述
实现代码:

#include<stdio.h>#include<stdlib.h>#include<math.h>#include<iostream>using namespace std;int w[2] = {0};int b = 0;int step = 1;class point{public:    point();    void givepoint(int inx,int iny,int intrue);    ~point();    int x;    int y;    int mytrue;private:};point::point(){}void point::givepoint(int inx,int iny,int intrue){    x = inx;    y = iny;    mytrue = intrue;}point::~point(){}void minL(point *inpoint,int pointnum){    int tempW[2];    int tempB=0;    int tempY;    int truecount = 0;    printf("初始w:(%d,%d),b:%d\n", w[0], w[1], b);    while (1)    {        tempW[0] = w[0];        tempW[1] = w[1];        tempB = b;        for (int i = 0; i < pointnum; i++)        {            tempY = tempW[0] * inpoint[i].x + tempW[1] * inpoint[i].y + tempB;            if ((tempY>0 && inpoint[i].mytrue == 1) || (tempY <=0 && inpoint[i].mytrue == -1))            {                truecount++;                continue;            }            else            {                if (tempY > 0)                {                    w[0] = w[0] - step*inpoint[i].x;                    w[1] = w[1] - step*inpoint[i].y;                    b = b - step * 1;                }                else if (tempY <= 0)                {                    w[0] = w[0] + step*inpoint[i].x;                    w[1] = w[1] + step*inpoint[i].y;                    b = b + step * 1;                }                printf("更新w:(%d,%d),b:%d\n", w[0], w[1], b);                break;            }        }        if (truecount == 3)        {            break;        }        else        {            truecount = 0;        }    }}int main(){    int pointnum;    printf("请输入点的个数:\n");    scanf("%d", &pointnum);    point *inpoint=new point[pointnum];    printf("请输入样本点的坐标及样本种类,用正负1表示\n");    for (int i=0; i < pointnum; i++)    {        int tempx, tempy, temptrue;        scanf("%d%d%d", &tempx, &tempy, &temptrue);        inpoint[i].givepoint(tempx, tempy, temptrue);    }    getchar();    printf("输入完毕开始分类\n");    minL(inpoint, pointnum);    printf("最终w:(%d,%d),b:%d\n", w[0], w[1], b);    getchar();}

实现效果:
这里写图片描述
感知器学习算法对偶形式:
输入:训练数据集T={(x1,y1),(x2,y2),,(xn,xn)},其中xiχ=Rn,yiγ={1+1},i=1,2,,N;学习率η0<η<1;
输出:a,b;感知机模型f(x)=sign(Nj=1ajyjxjx+b)
其中α=α1,α2,,αN)T
(1)α0,b0
(2)在训练集中选取数据(xi,yi)
(3)如果yi{Nj=1αjyjxjxi+b}0

αiαi+η
bb+ηyi

(4)转至(2)直到没有误分类数据
对偶形式中训练实例仅以内积的形式出现,为了方便可以先将训练实例间的内积计算出来并以矩阵的形式储存,用Gram矩阵(还记得现控课上被Gram矩阵笼罩的阴影吗?没错这边又回来了ˋ_ˊ* )
G=[xixj]N×N

同样用上面的例子实现代码:

#include<stdio.h>#include<stdlib.h>#include<math.h>#include<iostream>using namespace std;int w[2] = { 0 };int b = 0;int step = 1;int Y[3] = { 0 };class point{public:    point();    void givepoint(int inx, int iny, int intrue);    ~point();    int x;    int y;    int mytrue;private:};point::point(){}void point::givepoint(int inx, int iny, int intrue){    x = inx;    y = iny;    mytrue = intrue;}point::~point(){}void Duality_minL(point *inpoint, int pointnum,int *alpha,int **gram){    int tempW[2];    int tempB=0;    int tempY=0;    int truecount=0;    printf("初始a:(%d,%d,%d),b:%d\n", alpha[0], alpha[1], alpha[2], b);    while (1)    {    //  tempW[0] = w[0];    //  tempW[1] = w[1];        tempB = b;        for (int i = 0; i < pointnum; i++)        {            tempY = 0;            for (int j = 0; j <pointnum; j++)            {                tempY = tempY + alpha[j] * Y[j] * gram[j][i];            }            tempY = tempY + b;            //Y[i] = tempY;            if ((tempY>0 && inpoint[i].mytrue == 1) || (tempY <= 0 && inpoint[i].mytrue == -1))            {                truecount++;                continue;            }            else            {                if (tempY > 0)                {                    Y[i] = -1;                    alpha[i] = alpha[i] + 1;                    b = b +Y[i]*step;                }                else if (tempY <= 0)                {                    Y[i] = 1;                    alpha[i] = alpha[i] + 1;                    b = b + Y[i] * step;                }                printf("更新a:(%d,%d,%d),b:%d\n", alpha[0], alpha[1],alpha[2], b);                break;            }        }        if (truecount == 3)        {            break;        }        else        {            truecount = 0;        }    }    for (int i = 0; i < pointnum; i++)    {        w[0] = w[0] + alpha[i] * inpoint[i].x*inpoint[i].mytrue;        w[1] = w[1] + alpha[i] * inpoint[i].y*inpoint[i].mytrue;    }}int main(){    int pointnum;    printf("请输入点的个数:\n");    scanf("%d", &pointnum);    point *inpoint = new point[pointnum];    int *alpha = new int[pointnum];    for (int i = 0; i < pointnum; i++)    {        alpha[i] = 0;    }    printf("请输入样本点的坐标及样本种类,用正负1表示\n");    for (int i = 0; i < pointnum; i++)    {        int tempx, tempy, temptrue;        scanf("%d%d%d", &tempx, &tempy, &temptrue);        inpoint[i].givepoint(tempx, tempy, temptrue);    }    int **gram = new int *[pointnum];    for (int i = 0; i < pointnum; i++)    {        gram[i] = new int[pointnum];    }    for (int i = 0; i < pointnum; i++)    {        for (int j = 0; j < pointnum; j++)        {            gram[i][j] = inpoint[i].x*inpoint[j].x + inpoint[i].y*inpoint[j].y;        }    }    getchar();    printf("输入完毕开始分类\n");    //minL(inpoint, pointnum);    Duality_minL(inpoint, pointnum, alpha, gram);    printf("最终w:(%d,%d),b:%d\n", w[0], w[1], b);    getchar();}

这里写图片描述
这边感知器只能用于对线性的数据集进行二分类,而且因为样本量较少 所以能很快并且无误地将样本的正负集区分开来,当样本量多时可能无法全部无误地区分,这是应该要用到循环次数和损失函数;
暂且先这样,之后再改一下在mnist数据集上进行尝试
这里写图片描述

阅读全文
0 0
原创粉丝点击