机器学习 --KNN的介绍与实现

来源:互联网 发布:淘宝怎么做品牌分销 编辑:程序博客网 时间:2024/06/05 23:10
具体思路为:


1.初始化学习样本,即向量集合
2.指定每个向量集合所属的分类,例如a,b,c
3.对于新来的向量v,指定邻居数K,找到距离v最近的K个邻居,设为集合s
4.对于s中的每个向量,找出分类数最大的那个(例如K=5,3向量分在了a,1个在b,1个在c),则v的分类为a。


图例





实现(c#)


1.安装 accord framework 的nuget



2.实例代码 
  
public class Vector    {        public double[] Features { get; set; }        public Vector(double[] features)        {            Features = features;        }        public override string ToString()        {            return "(" + string.Join(",", Features) + ")";        }    }    public class KNNDemo    {        private static double[][] Inputs;        private static int[] Outputs;        private static void Training()        {            Inputs = new[]            {                // The first two are from class 0                new double[] { -5, -2, -1 },                new double[] { -5, -5, -6 },                // The next four are from class 1                new double[] {  2,  1,  1 },                new double[] {  1,  1,  2 },                new double[] {  1,  2,  2 },                new double[] {  3,  1,  2 },                // The last three are from class 2                new double[] { 11,  5,  4 },                new double[] { 15,  5,  6 },                new double[] { 10,  5,  6 },            };            Outputs = new[]            {                0, 0,        // First two from class 0                1, 1, 1, 1,  // Next four from class 1                2, 2, 2      // Last three from class 2            };        }        private const int K = 3;        private const int TotalClasses = 3;        public static void SortDistancesThenShow(Vector v)        {            var results = new List<dynamic>();            for (int i = 0; i < Inputs.Length; i++)            {                var dist = DistanceBetween(Inputs[i], v.Features);                results.Add(new                {                    InputStr = Stringify(Inputs[i]),                    Dist = dist,                    Class =Outputs[i]                });            }            results = results.OrderBy(x=>x.Dist).ToList();            foreach (var result in results)            {                dynamic r = result;                Console.WriteLine("input : {0}, class : {1} , distance : {2}", r.InputStr, r.Class, r.Dist);            }        }        private static string Stringify(double[] values)        {            return "(" + string.Join(",", values) + ")";        }        private static double DistanceBetween(double[] v1, double[] v2)        {            if (v1.Length != v2.Length)            {                throw new ArgumentException("vectors should be in same dimension.");            }            var distance = 0.0;            for (int i = 0; i < v1.Length; i++)            {                distance += Math.Pow(v1[i] - v2[i], 2);            }            return Math.Sqrt(distance);        }        public static int Execute(Vector v)        {            Training();            var knn = new KNearestNeighbors(K, TotalClasses, Inputs, Outputs);            return knn.Compute(v.Features);        }    }class Program    {        static void Main(string[] args)        {            var v = new Vector(new double[] {11, 5, 4});            var result = KNNDemo.Execute(v);            Console.WriteLine("Classification for input '{0}' result is '{1}'", v, result);            KNNDemo.SortDistancesThenShow(v);            Console.ReadLine();        }    }





运行结果



从运行的打印结果中可以看出(已按距离排序),预测的分类为2,以及新向量到每个测试数据的距离是多少。

1 0