<Machine Learning in Action >之一 k-近邻算法 C#实现手写识别
来源:互联网 发布:安卓远程桌面软件 编辑:程序博客网 时间:2024/05/17 06:38
def classify0(inX, dataSet, labels, k): 输入向量 训练样本集 标签向量 邻居数目 dataSetSize = dataSet.shape[0] diffMat = tile(inX, (dataSetSize,1)) - dataSet sqDiffMat = diffMat**2 sqDistances = sqDiffMat.sum(axis=1) distances = sqDistances**0.5 sortedDistIndicies = distances.argsort() classCount={} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]] classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]
-------------------------------------------------------------------------------------------------------------------------------------------------------------
.shape[0] 返回行数 .shape[1] 返回列数
.tile(x,(y,z)) 把x扩充y次(行),z次(列)
y**x 对y求x方
.sum(axis=0) 列求和 .sum(axis=1)行求和
.argsort() 数组从小到大得索引值
.itemgetter(x) 获取第几个值 编号从0开始
sorted(iterable[,cmp,[,key[,reverse=True]]])
作用:Return a new sorted list from the items in iterable.
第一个参数是一个iterable,返回值是一个对iterable中元素进行排序后的列表(list)。
可选的参数有三个,cmp、key和reverse。
1)cmp指定一个定制的比较函数,这个函数接收两个参数(iterable的元素),如果第一个参数小于第二个参数,返回一个负数;如果第一个参数等于第二个参数,返回零;如果第一个参数大于第二个参数,返回一个正数。默认值为None。
2)key指定一个接收一个参数的函数,这个函数用于从每个元素中提取一个用于比较的关键字。默认值为None。
3)reverse是一个布尔值。如果设置为True,列表元素将被倒序排列。
通常来说,key和reverse比一个等价的cmp函数处理速度要快。这是因为对于每个列表元素,cmp都会被调用多次,而key和reverse只被调用一次。
初次研究机器学习,留点东西以备理解,希望能用到实际开发中去,感觉非常有用。
随便做了一个C#的例子,识别效果还不错,样本才10来个,标签A,B,C 居然k值内100%准确率
using System;using System.Windows.Forms;using System.Runtime.InteropServices;using System.IO;using System.Drawing;using System.Text;using System.Collections.Generic;namespace KNN{ public partial class Form1 : Form { [DllImport("user32")] private static extern IntPtr LoadCursorFromFile(string fileName); private Point startPoint, endPoint; private static bool isDrawing = false; private byte[] dataSet = new byte[1024]; private Graphics g = null; private Bitmap bitmapResult; private List<DataSetFile> listDataSetFile = new List<DataSetFile>(); private int k = 5; public Form1() { InitializeComponent(); bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height); } private void Form1_Resize(object sender, EventArgs e) { this.Width = 800; this.Height = 600; } private void pictureBox1_MouseDown(object sender, MouseEventArgs e) { startPoint = new Point(e.X, e.Y); endPoint = new Point(e.X, e.Y); isDrawing = true; } private void pictureBox1_MouseUp(object sender, MouseEventArgs e) { isDrawing = false; } private void pictureBox1_MouseMove(object sender, MouseEventArgs e) { if (!new FileInfo("cursor.dat").Exists) { byte[] cursorbuffer = Properties.Resources.pen; FileStream fileStream = new FileStream("cursor.dat", FileMode.Create); fileStream.Write(cursorbuffer, 0, cursorbuffer.Length); fileStream.Close(); } Cursor.Current = new Cursor(LoadCursorFromFile("cursor.dat")); if (e.Button == MouseButtons.Left) { if (isDrawing) { g = Graphics.FromImage(bitmapResult); Point currentPoint = new Point(e.X, e.Y); g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.AntiAlias; g.DrawLine(new Pen(Color.Black, 10), endPoint, currentPoint); endPoint.X = currentPoint.X; endPoint.Y = currentPoint.Y; this.pictureBox1.Image = bitmapResult; } } } private void button2_Click(object sender, EventArgs e) //清空训练区 { bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height); g = Graphics.FromImage(bitmapResult); this.pictureBox1.Image = bitmapResult; } private void button1_Click(object sender, EventArgs e) //转换并存储训练数据 { if (this.pictureBox1.Image != null && textBox1.Text.Trim().Length == 1 && "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".Contains(textBox1.Text.Trim())) { Bitmap map = new Bitmap(this.pictureBox1.Image); int m = 0; for (int j = 0; j < pictureBox1.Height; j = j + 10) { for (int i = 0; i < pictureBox1.Width; i = i + 10) { Color c = map.GetPixel(i, j); if (c.Name == "0") dataSet[m] = 0x30; else dataSet[m] = 0x31; m++; } } DirectoryInfo di = new DirectoryInfo("dataSet"); //1*1024矩阵结构 if (!di.Exists) { di.Create(); } di = new DirectoryInfo("dataSetV"); //32*32矩阵结构 if (!di.Exists) { di.Create(); } string randFileName = textBox1.Text.Trim() + "_" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt"; FileStream fs = new FileStream(".\\dataSet\\" + randFileName, FileMode.Create); fs.Write(dataSet, 0, 1024); fs.Flush(); fs.Close(); fs = new FileStream(".\\dataSetV\\" + randFileName, FileMode.Create); StreamWriter sw = new StreamWriter(fs); string strDataSet = Encoding.Default.GetString(dataSet, 0, 1024); for (int i = 0; i < 32; i++) { sw.WriteLine(strDataSet.Substring(i * 32, 32)); } sw.Close(); fs.Close(); } } private void button3_Click(object sender, EventArgs e) //开始识别 { if (textBox2.Text.Trim() != "") int.TryParse(textBox2.Text.Trim(), out k); label3.Text = "识别结果:"; if (this.pictureBox1.Image != null && listDataSetFile.Count > 0) { Bitmap map = new Bitmap(this.pictureBox1.Image); int m = 0; for (int j = 0; j < pictureBox1.Height; j = j + 10) { for (int i = 0; i < pictureBox1.Width; i = i + 10) { Color c = map.GetPixel(i, j); if (c.Name == "0") dataSet[m] = 0x30; else dataSet[m] = 0x31; m++; } } foreach (DataSetFile i in listDataSetFile) { double value = 0; for (int j = 0; j < 1024; j++) { value += Math.Pow((Convert.ToDouble(dataSet[j]) - Convert.ToDouble(i.Content[j])), 2); } i.Value = Math.Sqrt(value); } listDataSetFile.Sort(CompareByValue); for (int i = 0; i < k; i++) { label3.Text += listDataSetFile[i].Label; } } } private void button4_Click(object sender, EventArgs e) // 加载样本 { DirectoryInfo di = new DirectoryInfo("dataSet"); FileInfo[] fi = di.GetFiles("*.txt"); foreach (FileInfo i in fi) { DataSetFile dsf = new DataSetFile(); dsf.Path = i.FullName; dsf.Label = i.Name.Substring(0, 1); byte[] byteContent = new byte[1024]; FileStream fs = new FileStream(dsf.Path, FileMode.Open); { fs.Read(byteContent, 0, 1024); } dsf.Content = byteContent; listDataSetFile.Add(dsf); } label4.Text = "加载样本完毕"; } private int CompareByValue(DataSetFile x, DataSetFile y) { int returnVal = x.Value.CompareTo(y.Value); return returnVal; } }}
using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace KNN{ class DataSetFile { double value; public double Value { get { return this.value; } set { this.value = value; } } string path; public string Path { get { return path; } set { path = value; } } string label; public string Label { get { return label; } set { label = value; } } byte[] content; public byte[] Content { get { return content; } set { content = value; } } }}
- <Machine Learning in Action >之一 k-近邻算法 C#实现手写识别
- 《Machine Learning in Action》 读书笔记之一:K近邻分类器算法
- <Machine Learning in Action >之四 二分k-均值算法 C#实现图像分割
- Machine Learning in Action 学习笔记-(2)kNN k近邻算法
- 小白学习Machine Learning in Action-机器学习实战------分类之k近邻算法
- Machine Learning In Action 系列----手写数字识别(一)
- 基于K-近邻算法识别手写数字的实现
- k近邻算法识别手写数字Python实现
- k-近邻算法实现手写数字识别系统
- k-近邻算法实现手写数字识别系统
- 手写识别系统(k-近邻算法)
- k-近邻算法 手写识别系统
- Python徒手实现识别手写数字—图像识别算法(K最近邻)
- Machine Learning in action:k-Nearest Neighbor
- k近邻 - 手写数字识别
- K近邻算法(一) python实现,手写数字识别(from机器学习实战)
- CSDN机器学习笔记十二 k-近邻算法实现手写识别系统
- k近邻算法实现手写数字的识别和图片的分类
- 安卓第八记——GridView的使用
- 创建空白ShapeFile文件
- webview 和android js 交互
- Deployment failure on Tomcat 6.x. Could not copy all resources to....
- Julia: [1 +j] ==[1 + j] ?
- <Machine Learning in Action >之一 k-近邻算法 C#实现手写识别
- python 多变量赋值
- 杭电1005 Number Sequence问题
- ArrayList初步
- DSADADSAD
- Android中JSON数据使用
- C++ Primer学习总结 第15章 面向对象程序设计
- IntelliJ Idea 常用快捷键列表
- leetcode 112 Path Su