k-means算法代码
来源:互联网 发布:新纪元软件官网 编辑:程序博客网 时间:2024/05/21 06:51
package com.nju.yzf;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class Kmeans {
/**
* @param args
* @throws IOException
*/
public static List<ArrayList<ArrayList<Double>>>
initHelpCenterList(List<ArrayList<ArrayList<Double>>> helpCenterList,int k){
for(int i=0;i<k;i++){
helpCenterList.add(new ArrayList<ArrayList<Double>>());
}
return helpCenterList;
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException{
List<ArrayList<Double>> centers = new ArrayList<ArrayList<Double>>();
List<ArrayList<Double>> newCenters = new ArrayList<ArrayList<Double>>();
List<ArrayList<ArrayList<Double>>> helpCenterList = new ArrayList<ArrayList<ArrayList<Double>>>();
//读入原始数据
BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream("wine.txt")));
String data = null;
List<ArrayList<Double>> dataList = new ArrayList<ArrayList<Double>>();
while((data=br.readLine())!=null){
//System.out.println(data);
String []fields = data.split(",");
List<Double> tmpList = new ArrayList<Double>();
for(int i=0; i<fields.length;i++)
tmpList.add(Double.parseDouble(fields[i]));
dataList.add((ArrayList<Double>) tmpList);
}
br.close();
//随机确定K个初始聚类中心
Random rd = new Random();
int k=3;
int [] initIndex={59,71,48};
int [] helpIndex = {0,59,130};
int [] givenIndex = {0,1,2};
System.out.println("random centers' index");
for(int i=0;i<k;i++){
int index = rd.nextInt(initIndex[i]) + helpIndex[i];
//int index = givenIndex[i];
System.out.println("index "+index);
centers.add(dataList.get(index));
helpCenterList.add(new ArrayList<ArrayList<Double>>());
}
/*
//注释掉的这部分目的是,取测试数据集最后稳定的三个类簇的聚类中心作为初始聚类中心
centers = new ArrayList<ArrayList<Double>>();
for(int i=0;i<59;i++)
helpCenterList.get(0).add(dataList.get(i));
for(int i=59;i<130;i++)
helpCenterList.get(1).add(dataList.get(i));
for(int i=130;i<178;i++)
helpCenterList.get(2).add(dataList.get(i));
for(int i=0;i<k;i++){
ArrayList<Double> tmp = new ArrayList<Double>();
for(int j=0;j<dataList.get(0).size();j++){
double sum=0;
for(int t=0;t<helpCenterList.get(i).size();t++)
sum+=helpCenterList.get(i).get(t).get(j);
tmp.add(sum/helpCenterList.get(i).size());
}
centers.add(tmp);
}
*/
//输出k个初始中心
System.out.println("original centers:");
for(int i=0;i<k;i++)
System.out.println(centers.get(i));
while(true)
{//进行若干次迭代,直到聚类中心稳定
for(int i=0;i<dataList.size();i++){//标注每一条记录所属于的中心
double minDistance=99999999;
int centerIndex=-1;
for(int j=0;j<k;j++){//离0~k之间哪个中心最近
double currentDistance=0;
for(int t=1;t<centers.get(0).size();t++){//计算两点之间的欧式距离
currentDistance+= ((centers.get(j).get(t)-dataList.get(i).get(t))/(centers.get(j).get(t)+dataList.get(i).get(t))) * ((centers.get(j).get(t)-dataList.get(i).get(t))/(centers.get(j).get(t)+dataList.get(i).get(t)));
}
if(minDistance>currentDistance){
minDistance=currentDistance;
centerIndex=j;
}
}
helpCenterList.get(centerIndex).add(dataList.get(i));
}
// System.out.println(helpCenterList);
//计算新的k个聚类中心
for(int i=0;i<k;i++){
ArrayList<Double> tmp = new ArrayList<Double>();
for(int j=0;j<centers.get(0).size();j++){
double sum=0;
for(int t=0;t<helpCenterList.get(i).size();t++)
sum+=helpCenterList.get(i).get(t).get(j);
tmp.add(sum/helpCenterList.get(i).size());
}
newCenters.add(tmp);
}
System.out.println("\nnew clusters' centers:\n");
for(int i=0;i<k;i++)
System.out.println(newCenters.get(i));
//计算新旧中心之间的距离,当距离小于阈值时,聚类算法结束
double distance=0;
for(int i=0;i<k;i++){
for(int j=1;j<centers.get(0).size();j++){//计算两点之间的欧式距离
distance += ((centers.get(i).get(j)-newCenters.get(i).get(j))/(centers.get(i).get(j)+newCenters.get(i).get(j))) * ((centers.get(i).get(j)-newCenters.get(i).get(j))/(centers.get(i).get(j)+newCenters.get(i).get(j)));
}
//System.out.println(i+" "+distance);
}
System.out.println("\ndistance: "+distance+"\n\n");
if(distance==0)//小于阈值时,结束循环
break;
else//否则,新的中心来代替旧的中心,进行下一轮迭代
{
centers = new ArrayList<ArrayList<Double>>(newCenters);
//System.out.println(newCenters);
newCenters = new ArrayList<ArrayList<Double>>();
helpCenterList = new ArrayList<ArrayList<ArrayList<Double>>>();
helpCenterList=initHelpCenterList(helpCenterList,k);
}
}
//输出最后聚类结果
for(int i=0;i<k;i++){
System.out.println("\n\nCluster: "+(i+1)+" size: "+helpCenterList.get(i).size()+" :\n\n");
for(int j=0;j<helpCenterList.get(i).size();j++)
{
System.out.println(helpCenterList.get(i).get(j));
}
}
}
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class Kmeans {
/**
* @param args
* @throws IOException
*/
public static List<ArrayList<ArrayList<Double>>>
initHelpCenterList(List<ArrayList<ArrayList<Double>>> helpCenterList,int k){
for(int i=0;i<k;i++){
helpCenterList.add(new ArrayList<ArrayList<Double>>());
}
return helpCenterList;
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException{
List<ArrayList<Double>> centers = new ArrayList<ArrayList<Double>>();
List<ArrayList<Double>> newCenters = new ArrayList<ArrayList<Double>>();
List<ArrayList<ArrayList<Double>>> helpCenterList = new ArrayList<ArrayList<ArrayList<Double>>>();
//读入原始数据
BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream("wine.txt")));
String data = null;
List<ArrayList<Double>> dataList = new ArrayList<ArrayList<Double>>();
while((data=br.readLine())!=null){
//System.out.println(data);
String []fields = data.split(",");
List<Double> tmpList = new ArrayList<Double>();
for(int i=0; i<fields.length;i++)
tmpList.add(Double.parseDouble(fields[i]));
dataList.add((ArrayList<Double>) tmpList);
}
br.close();
//随机确定K个初始聚类中心
Random rd = new Random();
int k=3;
int [] initIndex={59,71,48};
int [] helpIndex = {0,59,130};
int [] givenIndex = {0,1,2};
System.out.println("random centers' index");
for(int i=0;i<k;i++){
int index = rd.nextInt(initIndex[i]) + helpIndex[i];
//int index = givenIndex[i];
System.out.println("index "+index);
centers.add(dataList.get(index));
helpCenterList.add(new ArrayList<ArrayList<Double>>());
}
/*
//注释掉的这部分目的是,取测试数据集最后稳定的三个类簇的聚类中心作为初始聚类中心
centers = new ArrayList<ArrayList<Double>>();
for(int i=0;i<59;i++)
helpCenterList.get(0).add(dataList.get(i));
for(int i=59;i<130;i++)
helpCenterList.get(1).add(dataList.get(i));
for(int i=130;i<178;i++)
helpCenterList.get(2).add(dataList.get(i));
for(int i=0;i<k;i++){
ArrayList<Double> tmp = new ArrayList<Double>();
for(int j=0;j<dataList.get(0).size();j++){
double sum=0;
for(int t=0;t<helpCenterList.get(i).size();t++)
sum+=helpCenterList.get(i).get(t).get(j);
tmp.add(sum/helpCenterList.get(i).size());
}
centers.add(tmp);
}
*/
//输出k个初始中心
System.out.println("original centers:");
for(int i=0;i<k;i++)
System.out.println(centers.get(i));
while(true)
{//进行若干次迭代,直到聚类中心稳定
for(int i=0;i<dataList.size();i++){//标注每一条记录所属于的中心
double minDistance=99999999;
int centerIndex=-1;
for(int j=0;j<k;j++){//离0~k之间哪个中心最近
double currentDistance=0;
for(int t=1;t<centers.get(0).size();t++){//计算两点之间的欧式距离
currentDistance+= ((centers.get(j).get(t)-dataList.get(i).get(t))/(centers.get(j).get(t)+dataList.get(i).get(t))) * ((centers.get(j).get(t)-dataList.get(i).get(t))/(centers.get(j).get(t)+dataList.get(i).get(t)));
}
if(minDistance>currentDistance){
minDistance=currentDistance;
centerIndex=j;
}
}
helpCenterList.get(centerIndex).add(dataList.get(i));
}
// System.out.println(helpCenterList);
//计算新的k个聚类中心
for(int i=0;i<k;i++){
ArrayList<Double> tmp = new ArrayList<Double>();
for(int j=0;j<centers.get(0).size();j++){
double sum=0;
for(int t=0;t<helpCenterList.get(i).size();t++)
sum+=helpCenterList.get(i).get(t).get(j);
tmp.add(sum/helpCenterList.get(i).size());
}
newCenters.add(tmp);
}
System.out.println("\nnew clusters' centers:\n");
for(int i=0;i<k;i++)
System.out.println(newCenters.get(i));
//计算新旧中心之间的距离,当距离小于阈值时,聚类算法结束
double distance=0;
for(int i=0;i<k;i++){
for(int j=1;j<centers.get(0).size();j++){//计算两点之间的欧式距离
distance += ((centers.get(i).get(j)-newCenters.get(i).get(j))/(centers.get(i).get(j)+newCenters.get(i).get(j))) * ((centers.get(i).get(j)-newCenters.get(i).get(j))/(centers.get(i).get(j)+newCenters.get(i).get(j)));
}
//System.out.println(i+" "+distance);
}
System.out.println("\ndistance: "+distance+"\n\n");
if(distance==0)//小于阈值时,结束循环
break;
else//否则,新的中心来代替旧的中心,进行下一轮迭代
{
centers = new ArrayList<ArrayList<Double>>(newCenters);
//System.out.println(newCenters);
newCenters = new ArrayList<ArrayList<Double>>();
helpCenterList = new ArrayList<ArrayList<ArrayList<Double>>>();
helpCenterList=initHelpCenterList(helpCenterList,k);
}
}
//输出最后聚类结果
for(int i=0;i<k;i++){
System.out.println("\n\nCluster: "+(i+1)+" size: "+helpCenterList.get(i).size()+" :\n\n");
for(int j=0;j<helpCenterList.get(i).size();j++)
{
System.out.println(helpCenterList.get(i).get(j));
}
}
}
}
这是copy的人家的代码 坐等自己实现。
0 0
- k-means算法代码
- K-means算法JAVA代码
- K-means聚类算法Matlab代码
- K-Means算法和实现代码
- k-means算法MATLAB和opencv代码
- k-means算法MATLAB和opencv代码
- K-means、K-means ++、K-modes和K-prototype聚类算法简述 附Python代码
- 用代码聊算法之k-means算法
- K-means算法补充:K-means++
- K-Means++的代码
- K-means代码
- K-means Matlab代码
- k-means算法
- K-MEANS算法
- K-MEANS算法
- K-means算法
- k-means算法
- K-Means 算法
- Android自定义ViewGroup中LayoutParam的应用
- 使用busybox制作rootfs
- Apache DBCP连接数据库异常重连
- python 读取文件的最后一行
- 在linux中,如何增加、修改、删除、暂停和冻结用户名
- k-means算法代码
- windows下 远程DLL注入
- Windows 更新错误 8024401C -解决方法之一
- linux文件系统启动流程 ---笔记整理
- Codeforces Round #283 (Div. 2)---A. Minimum Difficulty (暴力)
- C#中线程状态的判别
- Gradle实现的两种简单的多渠道打包方法
- linux下vi命令大全
- 身为程序员的我们......