Stanford Algorithms学习：Clustering

来源：互联网发布：货到付款淘宝网商城编辑：程序博客网时间：2024/04/30 23:08

第二周的第一个编程作业，是利用贪婪算法来实现一个clustering的问题，和ML里学的unsupervised learning差不多。

Question 1

In this programming problem and the next you'll code up the clustering algorithm from lecture for computing a max-spacing

k-clustering. Download the text file here. This file describes a distance function (equivalently, a complete graph with edge costs). It has the following format:

[number_of_nodes]
[edge 1 node 1] [edge 1 node 2] [edge 1 cost]
[edge 2 node 1] [edge 2 node 2] [edge 2 cost]
...
There is one edge (i,j) for each choice of 1≤i<j≤n, where n is the number of nodes. For example, the third line of the file is "1 3 5250", indicating that the distance between nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume that distances are positive, but you should NOT assume that they are distinct.

Your task in this problem is to run the clustering algorithm from lecture on this data set, where the target number k of clusters is set to 4. What is the maximum spacing of a 4-clustering?

这个算法的实现和Kruskal's MST算法很像，先对所有边进行从小到大排序，然后利用Union-Find数据结构来；区别只是不要把所有的顶点都遍历一次，而是在要求的k值（这里是4）前结束循环。要求的距离就是下一个即将被扫描到的边的cost。

这里的Union-Find和Edge数据结构都是Algorithms fourth edition by Sedgewick里面的：

public class UnionFind {private int[] id;//id[i] = parent of i private int[] sizes;//size of each componentprivate int count;//number of components/** * Creates an empty union-find structure with N nodes * @param N */public UnionFind(int N) {count = N;id = new int[N];sizes = new int[N];for (int i = 0; i < N; i++) {id[i] = i;sizes[i] = 1;}}public int count(){return count;}public boolean connected(int u, int v){return (find(u) == find(v));}/** * Return component identifier for component containing p * @param u * @return */private int find(int u){while (u != id[u]) {u = id[u];}return u;}public void union(int u, int v){int i = find(u);int j= find(v);if (i == j) {return;}if (sizes[i] < sizes[j]) {id[i] = j;sizes[j] += sizes[i];}else {id[j] = i;sizes[i] += sizes[j];}count--;}}

public class Edge implements Comparable<Edge>{private final int u;//the first vertexprivate final int v;//the other vertexprivate final int cost;//edge costpublic Edge(int u, int v, int cost) {this.u = u;this.v = v;this.cost = cost;}public int cost(){return cost;}/** * returns one vertex of the edge */public int either(){return u;}/** * given one vertex, returns the other vertex of that edge * @param vertex * @return */public int other(int vertex) {if (vertex == u) {return v;}else if (vertex == v) {return u;}else {throw new RuntimeException("Inconsistent edge");}}@Overridepublic int compareTo(Edge arg0) {if (this.cost() < arg0.cost()) {return -1;}else if (this.cost() > arg0.cost) {return 1;}else {return 0;}}}

import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.ArrayList;import java.util.Collections;import java.util.Iterator;import java.util.StringTokenizer;public class Clustering {private ArrayList<Edge> edges;//stores all edgesprivate UnionFind uf;private int N;//number of edgesprivate String fileName = "/Users/Zhusong/Documents/Study/AlgorithmsDesignAndAnalysis/Assignments/Ass2/Ass2Prob1/clustering1.txt";public static void main(String[] args) {// TODO Auto-generated method stubClustering cl = new Clustering();cl.run();}private void run(){edges = new ArrayList<Edge>();readGraph();sortEdges();uf = new UnionFind(N);Iterator<Edge> iterator = edges.iterator();while(uf.count() > 4){Edge edge = iterator.next();int u = edge.either();uf.union(u, edge.other(u));}while(true){Edge edge = iterator.next();int u = edge.either();int v = edge.other(u);if (uf.connected(u, v)) {continue;}else {System.out.println(edge.cost());break;}}}/** * read in the text file and form a graph */private void readGraph() {File file = new File(fileName);try {BufferedReader rd = new BufferedReader(new FileReader(file));String line;line = rd.readLine();N = Integer.parseInt(line);while((line = rd.readLine()) != null){StringTokenizer tokenizer = new StringTokenizer(line);int u = Integer.parseInt(tokenizer.nextToken()) - 1;int v = Integer.parseInt(tokenizer.nextToken()) - 1;int cost = Integer.parseInt(tokenizer.nextToken());addEdge(u, v, cost);}rd.close();} catch (FileNotFoundException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}private void addEdge(int u, int v, int cost){Edge edge = new Edge(u, v, cost);edges.add(edge);}private void sortEdges(){Collections.sort(edges);}}