<Machine Learning in Action >之一 k-近邻算法 C#实现手写识别

来源:互联网 发布:安卓远程桌面软件 编辑:程序博客网 时间:2024/05/17 06:38
def classify0(inX, dataSet, labels, k):  输入向量 训练样本集  标签向量  邻居数目    dataSetSize = dataSet.shape[0]    diffMat = tile(inX, (dataSetSize,1)) - dataSet    sqDiffMat = diffMat**2    sqDistances = sqDiffMat.sum(axis=1)    distances = sqDistances**0.5    sortedDistIndicies = distances.argsort()         classCount={}              for i in range(k):        voteIlabel = labels[sortedDistIndicies[i]]        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)    return sortedClassCount[0][0]

-------------------------------------------------------------------------------------------------------------------------------------------------------------

.shape[0] 返回行数   .shape[1]  返回列数
.tile(x,(y,z)) 把x扩充y次(行),z次(列)
y**x  对y求x方
.sum(axis=0)  列求和   .sum(axis=1)行求和
.argsort() 数组从小到大得索引值
.itemgetter(x) 获取第几个值 编号从0开始
sorted(iterable[,cmp,[,key[,reverse=True]]])
作用:Return a new sorted list from the items in iterable.
          第一个参数是一个iterable,返回值是一个对iterable中元素进行排序后的列表(list)。
可选的参数有三个,cmp、key和reverse。
1)cmp指定一个定制的比较函数,这个函数接收两个参数(iterable的元素),如果第一个参数小于第二个参数,返回一个负数;如果第一个参数等于第二个参数,返回零;如果第一个参数大于第二个参数,返回一个正数。默认值为None。
2)key指定一个接收一个参数的函数,这个函数用于从每个元素中提取一个用于比较的关键字。默认值为None。
3)reverse是一个布尔值。如果设置为True,列表元素将被倒序排列。
通常来说,key和reverse比一个等价的cmp函数处理速度要快。这是因为对于每个列表元素,cmp都会被调用多次,而key和reverse只被调用一次。


初次研究机器学习,留点东西以备理解,希望能用到实际开发中去,感觉非常有用。
随便做了一个C#的例子,识别效果还不错,样本才10来个,标签A,B,C 居然k值内100%准确率


using System;using System.Windows.Forms;using System.Runtime.InteropServices;using System.IO;using System.Drawing;using System.Text;using System.Collections.Generic;namespace KNN{    public partial class Form1 : Form    {        [DllImport("user32")]        private static extern IntPtr LoadCursorFromFile(string fileName);        private Point startPoint, endPoint;        private static bool isDrawing = false;        private byte[] dataSet = new byte[1024];        private Graphics g = null;        private Bitmap bitmapResult;        private List<DataSetFile> listDataSetFile = new List<DataSetFile>();        private int k = 5;        public Form1()        {            InitializeComponent();            bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);        }        private void Form1_Resize(object sender, EventArgs e)        {            this.Width = 800;            this.Height = 600;        }        private void pictureBox1_MouseDown(object sender, MouseEventArgs e)        {            startPoint = new Point(e.X, e.Y);            endPoint = new Point(e.X, e.Y);            isDrawing = true;        }        private void pictureBox1_MouseUp(object sender, MouseEventArgs e)        {            isDrawing = false;        }        private void pictureBox1_MouseMove(object sender, MouseEventArgs e)        {            if (!new FileInfo("cursor.dat").Exists)            {                byte[] cursorbuffer = Properties.Resources.pen;                FileStream fileStream = new FileStream("cursor.dat", FileMode.Create);                fileStream.Write(cursorbuffer, 0, cursorbuffer.Length);                fileStream.Close();            }            Cursor.Current = new Cursor(LoadCursorFromFile("cursor.dat"));            if (e.Button == MouseButtons.Left)            {                if (isDrawing)                {                    g = Graphics.FromImage(bitmapResult);                    Point currentPoint = new Point(e.X, e.Y);                    g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.AntiAlias;                    g.DrawLine(new Pen(Color.Black, 10), endPoint, currentPoint);                    endPoint.X = currentPoint.X;                    endPoint.Y = currentPoint.Y;                    this.pictureBox1.Image = bitmapResult;                }            }        }        private void button2_Click(object sender, EventArgs e)  //清空训练区        {            bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);            g = Graphics.FromImage(bitmapResult);            this.pictureBox1.Image = bitmapResult;        }        private void button1_Click(object sender, EventArgs e)  //转换并存储训练数据        {            if (this.pictureBox1.Image != null && textBox1.Text.Trim().Length == 1 && "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".Contains(textBox1.Text.Trim()))            {                Bitmap map = new Bitmap(this.pictureBox1.Image);                int m = 0;                for (int j = 0; j < pictureBox1.Height; j = j + 10)                {                    for (int i = 0; i < pictureBox1.Width; i = i + 10)                    {                        Color c = map.GetPixel(i, j);                        if (c.Name == "0")                            dataSet[m] = 0x30;                        else                            dataSet[m] = 0x31;                        m++;                    }                }                DirectoryInfo di = new DirectoryInfo("dataSet"); //1*1024矩阵结构                if (!di.Exists)                {                    di.Create();                }                di = new DirectoryInfo("dataSetV");  //32*32矩阵结构                if (!di.Exists)                {                    di.Create();                }                string randFileName = textBox1.Text.Trim() + "_" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt";                FileStream fs = new FileStream(".\\dataSet\\" + randFileName, FileMode.Create);                fs.Write(dataSet, 0, 1024);                fs.Flush();                fs.Close();                fs = new FileStream(".\\dataSetV\\" + randFileName, FileMode.Create);                StreamWriter sw = new StreamWriter(fs);                string strDataSet = Encoding.Default.GetString(dataSet, 0, 1024);                for (int i = 0; i < 32; i++)                {                    sw.WriteLine(strDataSet.Substring(i * 32, 32));                }                sw.Close();                fs.Close();            }        }        private void button3_Click(object sender, EventArgs e)  //开始识别        {            if (textBox2.Text.Trim() != "")                int.TryParse(textBox2.Text.Trim(), out k);            label3.Text = "识别结果:";            if (this.pictureBox1.Image != null && listDataSetFile.Count > 0)            {                Bitmap map = new Bitmap(this.pictureBox1.Image);                int m = 0;                for (int j = 0; j < pictureBox1.Height; j = j + 10)                {                    for (int i = 0; i < pictureBox1.Width; i = i + 10)                    {                        Color c = map.GetPixel(i, j);                        if (c.Name == "0")                            dataSet[m] = 0x30;                        else                            dataSet[m] = 0x31;                        m++;                    }                }                foreach (DataSetFile i in listDataSetFile)                {                    double value = 0;                    for (int j = 0; j < 1024; j++)                    {                        value += Math.Pow((Convert.ToDouble(dataSet[j]) - Convert.ToDouble(i.Content[j])), 2);                    }                    i.Value = Math.Sqrt(value);                }                listDataSetFile.Sort(CompareByValue);                for (int i = 0; i < k; i++)                {                    label3.Text += listDataSetFile[i].Label;                }            }        }        private void button4_Click(object sender, EventArgs e)   //  加载样本        {            DirectoryInfo di = new DirectoryInfo("dataSet");            FileInfo[] fi = di.GetFiles("*.txt");            foreach (FileInfo i in fi)            {                DataSetFile dsf = new DataSetFile();                dsf.Path = i.FullName;                dsf.Label = i.Name.Substring(0, 1);                byte[] byteContent = new byte[1024];                FileStream fs = new FileStream(dsf.Path, FileMode.Open);                {                    fs.Read(byteContent, 0, 1024);                }                dsf.Content = byteContent;                listDataSetFile.Add(dsf);            }            label4.Text = "加载样本完毕";        }        private int CompareByValue(DataSetFile x, DataSetFile y)        {            int returnVal = x.Value.CompareTo(y.Value);            return returnVal;        }    }}

using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace KNN{    class DataSetFile    {        double value;        public double Value        {            get { return this.value; }            set { this.value = value; }        }        string path;        public string Path        {            get { return path; }            set { path = value; }        }        string label;        public string Label        {            get { return label; }            set { label = value; }        }        byte[] content;        public byte[] Content        {            get { return content; }            set { content = value; }        }    }}


1 0