DBScan聚类算法Java实现
来源:互联网 发布:工业产品网络推广 编辑:程序博客网 时间:2024/05/29 09:25
DBScan算法流程图
算法:DBScan,基于密度的聚类算法输入: D:一个包含n个数据的数据集 r:半径参数 minPts:领域密度阈值输出:基于密度的聚类集合
标记D中所有的点为unvistedfor each p in D if p.visit = unvisted 找出与点p距离不大于r的所有点集合N If N.size() < minPts 标记点p为噪声点 Else for each p' in N If p'.visit == unvisted 找出与点p距离不大于r的所有点集合N' If N'.size()>=minPts 将集合N'加入集合N中去 End if Else If p'未被聚到某个簇 将p'聚到当前簇 If p'被标记为噪声点 将p'取消标记为噪声点 End If End If End If End for End if End ifEnd for
代码实现
Point.java 定义点,其中距离计算采用欧式距离
package Cluster.DBScan;/** * Created by Jason on 2016/4/17. */public class Point { private double x; private double y; private boolean isVisit; private int cluster; private boolean isNoised; public Point(double x,double y) { this.x = x; this.y = y; this.isVisit = false; this.cluster = 0; this.isNoised = false; } public double getDistance(Point point) { return Math.sqrt((x-point.x)*(x-point.x)+(y-point.y)*(y-point.y)); } public void setX(double x) { this.x = x; } public double getX() { return x; } public void setY(double y) { this.y = y; } public double getY() { return y; } public void setVisit(boolean isVisit) { this.isVisit = isVisit; } public boolean getVisit() { return isVisit; } public int getCluster() { return cluster; } public void setNoised(boolean isNoised) { this.isNoised = isNoised; } public void setCluster(int cluster) { this.cluster = cluster; } public boolean getNoised() { return this.isNoised; } @Override public String toString() { return x+" "+y+" "+cluster+" "+(isNoised?1:0); }}
DBScan.java
package Cluster.DBScan;import java.util.ArrayList;/** * Created by Jason on 2016/4/17. */public class DBScan { private double radius; private int minPts; public DBScan(double radius,int minPts) { this.radius = radius; this.minPts = minPts; } public void process(ArrayList<Point> points) { int size = points.size(); int idx = 0; int cluster = 1; while (idx<size) { Point p = points.get(idx++); //choose an unvisited point if (!p.getVisit()) { p.setVisit(true);//set visited ArrayList<Point> adjacentPoints = getAdjacentPoints(p, points); //set the point which adjacent points less than minPts noised if (adjacentPoints != null && adjacentPoints.size() < minPts) { p.setNoised(true); } else { p.setCluster(cluster); for (int i = 0; i < adjacentPoints.size(); i++) { Point adjacentPoint = adjacentPoints.get(i); //only check unvisited point, cause only unvisited have the chance to add new adjacent points if (!adjacentPoint.getVisit()) { adjacentPoint.setVisit(true); ArrayList<Point> adjacentAdjacentPoints = getAdjacentPoints(adjacentPoint, points); //add point which adjacent points not less than minPts noised if (adjacentAdjacentPoints != null && adjacentAdjacentPoints.size() >= minPts) { adjacentPoints.addAll(adjacentAdjacentPoints); } } //add point which doest not belong to any cluster if (adjacentPoint.getCluster() == 0) { adjacentPoint.setCluster(cluster); //set point which marked noised before non-noised if (adjacentPoint.getNoised()) { adjacentPoint.setNoised(false); } } } cluster++; } } } } private ArrayList<Point> getAdjacentPoints(Point centerPoint,ArrayList<Point> points) { ArrayList<Point> adjacentPoints = new ArrayList<Point>(); for (Point p:points) { //include centerPoint itself double distance = centerPoint.getDistance(p); if (distance<=radius) { adjacentPoints.add(p); } } return adjacentPoints; }}
Data.java 随机模拟产生数据
package Cluster.DBScan;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.IOException;import java.text.DecimalFormat;import java.text.NumberFormat;import java.util.ArrayList;import java.util.Random;/** * Created by Jason on 2016/4/17. */public class Data { private static DecimalFormat df=(DecimalFormat) NumberFormat.getInstance(); public static ArrayList<Point> generateSinData(int size) { ArrayList<Point> points = new ArrayList<Point>(size); Random rd = new Random(size); for (int i=0;i<size/2;i++) { double x = format(Math.PI / (size / 2) * (i + 1)); double y = format(Math.sin(x)) ; points.add(new Point(x,y)); } for (int i=0;i<size/2;i++) { double x = format(1.5 + Math.PI / (size/2) * (i+1)); double y = format(Math.cos(x)); points.add(new Point(x,y)); } return points; } public static ArrayList<Point> generateSpecialData() { ArrayList<Point> points = new ArrayList<Point>(); points.add(new Point(2,2)); points.add(new Point(3,1)); points.add(new Point(3,4)); points.add(new Point(3,14)); points.add(new Point(5,3)); points.add(new Point(8,3)); points.add(new Point(8,6)); points.add(new Point(9,8)); points.add(new Point(10,4)); points.add(new Point(10,7)); points.add(new Point(10,10)); points.add(new Point(10,14)); points.add(new Point(11,13)); points.add(new Point(12,7)); points.add(new Point(12,15)); points.add(new Point(14,7)); points.add(new Point(14,9)); points.add(new Point(14,15)); points.add(new Point(15,8)); return points; } public static void writeData(ArrayList<Point> points,String path) { try { BufferedWriter bw = new BufferedWriter(new FileWriter(path)); for (Point point:points) { bw.write(point.toString()+"\n"); } bw.close(); } catch (IOException e) { e.printStackTrace(); } } private static double format(double x) { return Double.valueOf(df.format(x)); }}
Client.java 运行聚类算法
package Cluster.DBScan;import java.util.ArrayList;/** * Created by Jason on 2016/4/17. */public class Client { public static void main(String[] args) { //ArrayList<Point> points = Data.generateSinData(200); //DBScan dbScan = new DBScan(0.6,4); ArrayList<Point> points = Data.generateSpecialData(); DBScan dbScan = new DBScan(3,3); dbScan.process(points); for (Point p:points) { System.out.println(p); } Data.writeData(points,"data.txt"); }}
效果展示
数据展示采用matlab绘制,
a = importdata('data.txt');m=size(a,1);for i=1:1:m if a(i,3)==1 plot(a(i,1),a(i,2),'r.'); elseif a(i,3)==2 plot(a(i,1),a(i,2),'b.'); else plot(a(i,1),a(i,2),'k*'); end hold on;endgrid on;
数据1,Data.generateSinData(200),聚类效果
数据2,Data.generateSpecialData(),聚类效果
不同颜色代表不同类,*代表噪声点
所有代码下载:https://github.com/lincolnmi/algorithms/tree/master/src/Cluster/DBScan
参考链接:
http://www.cnblogs.com/aijianiula/p/4339960.html
http://www.dataguru.cn/thread-18180-1-1.html
2 0
- 聚类算法之DBScan(Java实现)
- 聚类算法之DBScan(Java实现)
- 聚类算法之DBScan(Java实现)
- DBScan聚类算法Java实现
- 聚类算法 dbscan java 实现
- DBSCAN算法 JAVA实现
- 基于密度的聚类算法(DBSCAN)的java实现
- DBSCAN算法的Java实现
- DBSCAN聚类算法C++实现
- DBSCAN聚类算法的实现
- DBSCAN聚类算法C++实现
- DBSCAN聚类算法的实现
- 聚类算法-DBSCAN-C++实现
- dbscan聚类算法的R实现
- DBSCAN聚类算法C++实现
- 聚类算法实现(二)DBSCAN
- DBSCAN聚类算法原理及其实现
- DBSCAN聚类算法原理及其实现
- Mafile补充学习1~2
- WebService学习总结(四)——调用第三方提供的webService服务
- Javascript创建对象的几种方式
- 关于javassist.NotFoundException
- 线性降维方法PCA和LDA
- DBScan聚类算法Java实现
- PTA 04-树6 Complete Binary Search Tree
- 第四周项目四(1)-程序分析
- 指针与引用
- php中require,include,use区分
- N皇后Java算法
- 慕课 springmvc拦截器
- 仿qq侧滑删除的一个自定义View,独立的,不需要依赖其他的view
- java基础之泛型