DBScan聚类算法Java实现

来源:互联网 发布:工业产品网络推广 编辑:程序博客网 时间:2024/05/29 09:25

DBScan算法流程图

算法:DBScan,基于密度的聚类算法输入:   D:一个包含n个数据的数据集   r:半径参数   minPts:领域密度阈值输出:基于密度的聚类集合
标记D中所有的点为unvistedfor each p in D    if p.visit = unvisted        找出与点p距离不大于r的所有点集合N        If N.size() < minPts           标记点p为噪声点        Else           for each p' in N               If p'.visit == unvisted                  找出与点p距离不大于r的所有点集合N'                    If N'.size()>=minPts                        将集合N'加入集合N中去                    End if                Else                    If p'未被聚到某个簇                        将p'聚到当前簇                        If p'被标记为噪声点                           将p'取消标记为噪声点                        End If                     End If                 End If             End for         End if     End ifEnd for

代码实现

Point.java 定义点,其中距离计算采用欧式距离

package Cluster.DBScan;/** * Created by Jason on 2016/4/17. */public class Point {    private double x;    private double y;    private boolean isVisit;    private int cluster;    private boolean isNoised;    public Point(double x,double y) {        this.x = x;        this.y = y;        this.isVisit = false;        this.cluster = 0;        this.isNoised = false;    }    public double getDistance(Point point) {        return Math.sqrt((x-point.x)*(x-point.x)+(y-point.y)*(y-point.y));    }    public void setX(double x) {        this.x = x;    }    public double getX() {        return x;    }    public void setY(double y) {        this.y = y;    }    public double getY() {        return y;    }    public void setVisit(boolean isVisit) {        this.isVisit = isVisit;    }    public boolean getVisit() {        return isVisit;    }    public int getCluster() {        return cluster;    }    public void setNoised(boolean isNoised) {        this.isNoised = isNoised;    }    public void setCluster(int cluster) {        this.cluster = cluster;    }    public boolean getNoised() {        return this.isNoised;    }    @Override    public String toString() {        return x+" "+y+" "+cluster+" "+(isNoised?1:0);    }}

DBScan.java

package Cluster.DBScan;import java.util.ArrayList;/** * Created by Jason on 2016/4/17. */public class DBScan {    private double radius;    private int minPts;    public DBScan(double radius,int minPts) {        this.radius = radius;        this.minPts = minPts;    }    public void process(ArrayList<Point> points) {        int size = points.size();        int idx = 0;        int cluster = 1;        while (idx<size) {            Point p = points.get(idx++);            //choose an unvisited point            if (!p.getVisit()) {                p.setVisit(true);//set visited                ArrayList<Point> adjacentPoints = getAdjacentPoints(p, points);                //set the point which adjacent points less than minPts noised                if (adjacentPoints != null && adjacentPoints.size() < minPts) {                    p.setNoised(true);                } else {                    p.setCluster(cluster);                    for (int i = 0; i < adjacentPoints.size(); i++) {                        Point adjacentPoint = adjacentPoints.get(i);                        //only check unvisited point, cause only unvisited have the chance to add new adjacent points                        if (!adjacentPoint.getVisit()) {                            adjacentPoint.setVisit(true);                            ArrayList<Point> adjacentAdjacentPoints = getAdjacentPoints(adjacentPoint, points);                            //add point which adjacent points not less than minPts noised                            if (adjacentAdjacentPoints != null && adjacentAdjacentPoints.size() >= minPts) {                                adjacentPoints.addAll(adjacentAdjacentPoints);                            }                        }                        //add point which doest not belong to any cluster                        if (adjacentPoint.getCluster() == 0) {                            adjacentPoint.setCluster(cluster);                            //set point which marked noised before non-noised                            if (adjacentPoint.getNoised()) {                                adjacentPoint.setNoised(false);                            }                        }                    }                    cluster++;                }            }        }    }    private ArrayList<Point> getAdjacentPoints(Point centerPoint,ArrayList<Point> points) {        ArrayList<Point> adjacentPoints = new ArrayList<Point>();        for (Point p:points) {            //include centerPoint itself            double distance = centerPoint.getDistance(p);            if (distance<=radius) {                adjacentPoints.add(p);            }        }        return adjacentPoints;    }}

Data.java 随机模拟产生数据

package Cluster.DBScan;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.IOException;import java.text.DecimalFormat;import java.text.NumberFormat;import java.util.ArrayList;import java.util.Random;/** * Created by Jason on 2016/4/17. */public class Data {    private static DecimalFormat df=(DecimalFormat) NumberFormat.getInstance();    public static ArrayList<Point> generateSinData(int size) {        ArrayList<Point> points = new ArrayList<Point>(size);        Random rd = new Random(size);        for (int i=0;i<size/2;i++) {            double x = format(Math.PI / (size / 2) * (i + 1));            double y = format(Math.sin(x)) ;            points.add(new Point(x,y));        }        for (int i=0;i<size/2;i++) {            double x = format(1.5 + Math.PI / (size/2) * (i+1));            double y = format(Math.cos(x));            points.add(new Point(x,y));        }        return points;    }    public static ArrayList<Point> generateSpecialData() {        ArrayList<Point> points = new ArrayList<Point>();        points.add(new Point(2,2));        points.add(new Point(3,1));        points.add(new Point(3,4));        points.add(new Point(3,14));        points.add(new Point(5,3));        points.add(new Point(8,3));        points.add(new Point(8,6));        points.add(new Point(9,8));        points.add(new Point(10,4));        points.add(new Point(10,7));        points.add(new Point(10,10));        points.add(new Point(10,14));        points.add(new Point(11,13));        points.add(new Point(12,7));        points.add(new Point(12,15));        points.add(new Point(14,7));        points.add(new Point(14,9));        points.add(new Point(14,15));        points.add(new Point(15,8));        return points;    }    public static void writeData(ArrayList<Point> points,String path) {        try {            BufferedWriter bw = new BufferedWriter(new FileWriter(path));            for (Point point:points) {                bw.write(point.toString()+"\n");            }            bw.close();        } catch (IOException e) {            e.printStackTrace();        }    }    private static double format(double x) {        return Double.valueOf(df.format(x));    }}

Client.java 运行聚类算法

package Cluster.DBScan;import java.util.ArrayList;/** * Created by Jason on 2016/4/17. */public class Client {    public static void main(String[] args) {        //ArrayList<Point> points = Data.generateSinData(200);        //DBScan dbScan = new DBScan(0.6,4);        ArrayList<Point> points = Data.generateSpecialData();        DBScan dbScan = new DBScan(3,3);        dbScan.process(points);        for (Point p:points) {            System.out.println(p);        }        Data.writeData(points,"data.txt");    }}

效果展示

数据展示采用matlab绘制,

a = importdata('data.txt');m=size(a,1);for i=1:1:m    if a(i,3)==1        plot(a(i,1),a(i,2),'r.');    elseif a(i,3)==2        plot(a(i,1),a(i,2),'b.');    else        plot(a(i,1),a(i,2),'k*');       end    hold on;endgrid on;

数据1,Data.generateSinData(200),聚类效果
数据1
数据2,Data.generateSpecialData(),聚类效果
这里写图片描述
不同颜色代表不同类,*代表噪声点

所有代码下载:https://github.com/lincolnmi/algorithms/tree/master/src/Cluster/DBScan

参考链接:
http://www.cnblogs.com/aijianiula/p/4339960.html
http://www.dataguru.cn/thread-18180-1-1.html

2 0
原创粉丝点击