在内存中执行k-means聚类算法

来源:互联网 发布:birch算法 编辑:程序博客网 时间:2024/06/05 20:13
<strong><span style="font-size:18px;">/*** * @author YangXin * @info 利用点集测试K-Means聚类算法 */package unitNine;import java.util.ArrayList;import java.util.List;import org.apache.mahout.clustering.Cluster;import org.apache.mahout.clustering.UncommonDistributions;import org.apache.mahout.common.distance.EuclideanDistanceMeasure;import org.apache.mahout.math.DenseVector;import org.apache.mahout.math.Vector;public class KMeansExample {private static void generateSamples(List<Vector> vectors, int num, double mx, double my, double sd){for(int i = 0; i < num; i++){vectors.add(new DenseVector(new double[]{UncommonDistributions.rNorm(mx, sd),UncommonDistributions.rNorm(my, sd) }));}}public static void main(String[] args){List<Vector> sampleData = new ArrayList<Vector>();RandomPointsUtil.generateSamples(sampleData, 400, 1, 1, 3);RandomPointsUtil.generateSamples(sampleData, 300, 1, 0, 0.5);RandomPointsUtil.generateSamples(sampleData, 300, 0, 2, 0.1);int k = 3;List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(sampleData, k);List<Cluster> clusters = new ArrayList<Cluster>();int clusterId = 0;for (Vector v : randomPoints) {clusters.add(new Cluster(v, clusterId++, new EuclideanDistanceMeasure()));}List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(sampleData, clusters, new EuclideanDistanceMeasure(), 3, 0.01);for (Cluster cluster : finalClusters.get(finalClusters.size() - 1)) {System.out.println("Cluster id: " + cluster.getId() + " center: "+ cluster.getCenter().asFormatString());}}</span></strong>


<strong><span style="font-size:18px;">/*** * @author YangXin * @info 处理随机点的类 */package unitNine;import java.util.ArrayList;import java.util.List;import java.util.Random;import org.apache.mahout.common.RandomUtils;import org.apache.mahout.math.DenseVector;import org.apache.mahout.math.Vector;public class RandomPointsUtil {public static void generateSamples(List<Vector> vectors, int num,double mx, double my, double sd) {for (int i = 0; i < num; i++) {vectors.add(new DenseVector(new double[] {org.apache.mahout.clustering.UncommonDistributions.rNorm(mx, sd),org.apache.mahout.clustering.UncommonDistributions.rNorm(my, sd) }));}}  public static List<Vector> chooseRandomPoints(Iterable<Vector> vectors, int k) {    List<Vector> chosenPoints = new ArrayList<Vector>(k);    Random random = RandomUtils.getRandom();    for (Vector value : vectors) {      int currentSize = chosenPoints.size();      if (currentSize < k) {        chosenPoints.add(value);      } else if (random.nextInt(currentSize + 1) == 0) { // with chance 1/(currentSize+1) pick new element        int indexToRemove = random.nextInt(currentSize); // evict one chosen randomly        chosenPoints.remove(indexToRemove);        chosenPoints.add(value);      }    }    return chosenPoints;  }}</span></strong>



0 0
原创粉丝点击