用java源代码学数据结构<六>: HashSet HashMap 详解

来源：互联网发布：金蝶软件价格编辑：程序博客网时间：2024/05/24 01:48

散列表是数据结构中的重要技术，散列表的最大优点在于便于查找，缺点在于插入删除较为麻烦。java中很多数据类型如hashMap,hashTable,hashSet基本实现就是采用散列表技术。首先看下HashSet。

package java.util;/*1.HashSet中不允许重复元素2.HashSet中大量调用了HashMap的方法，其内部封装了一个HashMap*/public class HashSet<E>    extends AbstractSet<E>    implements Set<E>, Cloneable, java.io.Serializable{    static final long serialVersionUID = -5024744406713321676L;//hashSet内部使用HashMap来存储元素，    private transient HashMap<E,Object> map;//定义一个静态对象，作为所有key的value    private static final Object PRESENT = new Object();    public HashSet() {        map = new HashMap<>();    }    public HashSet(Collection<? extends E> c) {        map = new HashMap<>(Math.max((int) (c.size()/.75f) + 1, 16));        addAll(c);    }    public HashSet(int initialCapacity, float loadFactor) {        map = new HashMap<>(initialCapacity, loadFactor);    }    public HashSet(int initialCapacity) {        map = new HashMap<>(initialCapacity);    }    HashSet(int initialCapacity, float loadFactor, boolean dummy) {        map = new LinkedHashMap<>(initialCapacity, loadFactor);    }    public Iterator<E> iterator() {        return map.keySet().iterator();    }    public int size() {        return map.size();    }    public boolean isEmpty() {        return map.isEmpty();    }        public boolean contains(Object o) {        return map.containsKey(o);    }        public boolean add(E e) {        return map.put(e, PRESENT)==null;    }       public boolean remove(Object o) {        return map.remove(o)==PRESENT;    }        public void clear() {        map.clear();    }       public Object clone() {        try {            HashSet<E> newSet = (HashSet<E>) super.clone();            newSet.map = (HashMap<E, Object>) map.clone();            return newSet;        } catch (CloneNotSupportedException e) {            throw new InternalError();        }    }}

HashSet内部封装了一个HashMap，将要存储的对象作为每个键值对中的key，然后采用静态变量对象PRESENT作为所有key的value。在看HashMap之前来讲讲什么叫做散列(hash)?Hash的具体含义参见百度百科(hash)。简单的来说是指对所以的关键字，不直接采用关键字作为存储数组的下标，而是根据关键字计算出相应下标。hash的关键技术在于如何产生合适的hashcode，以及如何解决冲突(多个key映射到一个位置上)。hash表在查找方面上平均只需要O(1)的时间，也就是一找就到的节奏。在来看HashMap的内部实现。

package java.util;import java.io.*;/*HashMap是线程不同步的，可以进行封装Map m = Collections.synchronizedMap(new HashMap(...));*/public class HashMap<K,V>    extends AbstractMap<K,V>    implements Map<K,V>, Cloneable, Serializable{/*HashMap 的实例有两个参数影响其性能：初始容量 和加载因子。容量是哈希表中桶的数量，初始容量只是哈希表在创建时的容量。加载因子是哈希表在其容量自动增加之前可以达到多满的一种尺度。当哈希表中的条目数超出了加载因子与当前容量的乘积时，则要对该哈希表进行 rehash 操作（即重建内部数据结构），从而哈希表将具有大约两倍的桶数。在Java编程语言中，加载因子默认值为0.75，默认哈希表元为101*///初始化容量    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16//最大容量    static final int MAXIMUM_CAPACITY = 1 << 30;//加载因子    static final float DEFAULT_LOAD_FACTOR = 0.75f;//用来存储键值对的Entry数组,用于设置刚刚初始化的HashMap对象,用来减少存储空间    static final Entry<?,?>[] EMPTY_TABLE = {};    //大小必须是2的倍数    transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;//存储的键值对的数目    transient int size;//阈值，当size超过threshold时，table将会扩容.//threshold = capacity * loadFactor    int threshold;//加载因子        final float loadFactor;//修改次数，用于检查线程是否同步    transient int modCount;   //默认的阀值    static final int ALTERNATIVE_HASHING_THRESHOLD_DEFAULT = Integer.MAX_VALUE;       private static class Holder {                static final int ALTERNATIVE_HASHING_THRESHOLD;        static {//获取jdk内置的阀值            String altThreshold = java.security.AccessController.doPrivileged(                new sun.security.action.GetPropertyAction(                    "jdk.map.althashing.threshold"));            int threshold;            try {//设置当前阀值                threshold = (null != altThreshold)                        ? Integer.parseInt(altThreshold)                        : ALTERNATIVE_HASHING_THRESHOLD_DEFAULT;                // disable alternative hashing if -1                if (threshold == -1) {                    threshold = Integer.MAX_VALUE;                }                if (threshold < 0) {                    throw new IllegalArgumentException("value must be positive integer.");                }            } catch(IllegalArgumentException failed) {                throw new Error("Illegal value for 'jdk.map.althashing.threshold'", failed);            }            ALTERNATIVE_HASHING_THRESHOLD = threshold;        }    }    //使用初始化容量和加载因子初始化HashMap    public HashMap(int initialCapacity, float loadFactor) {        if (initialCapacity < 0)            throw new IllegalArgumentException("Illegal initial capacity: " +                                               initialCapacity);        if (initialCapacity > MAXIMUM_CAPACITY)            initialCapacity = MAXIMUM_CAPACITY;        if (loadFactor <= 0 || Float.isNaN(loadFactor))            throw new IllegalArgumentException("Illegal load factor: " +                                               loadFactor);        this.loadFactor = loadFactor;        threshold = initialCapacity;        init();    }       public HashMap(int initialCapacity) {        this(initialCapacity, DEFAULT_LOAD_FACTOR);    }        public HashMap() {        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);    }    /* * Constructs a new HashMap with the same mappings as the     * specified Map.  The HashMap is created with     * default load factor (0.75) and an initial capacity sufficient to     * hold the mappings in the specified Map.*/    public HashMap(Map<? extends K, ? extends V> m) {        this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1,                      DEFAULT_INITIAL_CAPACITY), DEFAULT_LOAD_FACTOR);        inflateTable(threshold);        putAllForCreate(m);    }/**     * A randomizing value associated with this instance that is applied to     * hash code of keys to make hash collisions harder to find. If 0 then alternative hashing is disabled.     */    transient int hashSeed = 0;//工具函数,将number扩展成2的倍数    private static int roundUpToPowerOf2(int number) {        // assert number >= 0 : "number must be non-negative";        int rounded = number >= MAXIMUM_CAPACITY                ? MAXIMUM_CAPACITY                : (rounded = Integer.highestOneBit(number)) != 0                    ? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded                    : 1;        return rounded;    }    //将表格大小扩展到toSize    private void inflateTable(int toSize) {        // Find a power of 2 >= toSize        int capacity = roundUpToPowerOf2(toSize);//重新设置阀值        threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);        //重新设置tabletable = new Entry[capacity];//根据capacity初始化hashSeed        initHashSeedAsNeeded(capacity);    }    // internal utilities    void init() {    }    /**     * Initialize the hashing mask value. We defer initialization until we     * really need it.     */    final boolean initHashSeedAsNeeded(int capacity) {        boolean currentAltHashing = hashSeed != 0;//根据系统函数得到一个hash        boolean useAltHashing = sun.misc.VM.isBooted() &&                (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);        boolean switching = currentAltHashing ^ useAltHashing;//如果hashSeed初始化为0则跳过switching//否则使用系统函数得到新的hashSeed        if (switching) {            hashSeed = useAltHashing                ? sun.misc.Hashing.randomHashSeed(this)                : 0;        }        return switching;    }   /*哈希算法的核心：哈希函数     * Retrieve object hash code and applies a supplemental hash function to the     * result hash, which defends against poor quality hash functions.  This is     * critical because HashMap uses power-of-two length hash tables, that     * otherwise encounter collisions for hashCodes that do not differ     * in lower bits. Note: Null keys always map to hash 0, thus index 0.     */*/    final int hash(Object k) {        int h = hashSeed;//通过hashSeed初始化的值的不同来选择不同的hash方式        if (0 != h && k instanceof String) {            return sun.misc.Hashing.stringHash32((String) k);        }        h ^= k.hashCode();                h ^= (h >>> 20) ^ (h >>> 12);        return h ^ (h >>> 7) ^ (h >>> 4);    }    //Returns index for hash code h.通过得到的hash值来确定它在table中的位置    static int indexFor(int h, int length) {        // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";        return h & (length-1);    }        public int size() {        return size;    }       public boolean isEmpty() {        return size == 0;    }        public V get(Object key) {        if (key == null)            return getForNullKey();        Entry<K,V> entry = getEntry(key);//查看调用函数，在下面        return null == entry ? null : entry.getValue();    }        private V getForNullKey() {        if (size == 0) {            return null;        }        for (Entry<K,V> e = table[0]; e != null; e = e.next) {            if (e.key == null)                return e.value;        }        return null;    }        public boolean containsKey(Object key) {        return getEntry(key) != null;    }    final Entry<K,V> getEntry(Object key) {        if (size == 0) {            return null;        }//通过key的hash值确定table下标（null对应下标0）        int hash = (key == null) ? 0 : hash(key);//indexFor() = h & (length-1) = hash&(table.length-1)        for (Entry<K,V> e = table[indexFor(hash, table.length)];             e != null;             e = e.next)//对冲突的处理办法是将线性探查，即将元素放到冲突位置的下一个可用位置上{            Object k;/*注意：因为元素可能不是刚好存在它对应hash值得下一个位置(如果该位置之前有元素，则要放在下两个的位置，以此类推)*/            if (e.hash == hash &&                ((k = e.key) == key || (key != null && key.equals(k))))//所以不仅要判断hash还要判断key（因为不同的key可能有相同的hash值）                return e;        }        return null;    }     /* * 1. 通过key的hash值确定table下标     * 2. 查找table下标，如果key存在则更新对应的value     * 3. 如果key不存在则调用addEntry()方法 */    public V put(K key, V value) {        if (table == EMPTY_TABLE) {//初始化存储表空间            inflateTable(threshold);        }        if (key == null)            return putForNullKey(value);        int hash = hash(key);        int i = indexFor(hash, table.length);/*注意：我不断的寻找，hash值对应位置之后的可用位置在哪里*/        for (Entry<K,V> e = table[i]; e != null; e = e.next) {            Object k;            if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {                V oldValue = e.value;                e.value = value;                e.recordAccess(this);                return oldValue;            }        }//上面的循环结束表示当前的key不存在与表中，需要另外增加        modCount++;        addEntry(hash, key, value, i);//函数在下面        return null;    }/*为减少篇幅，删除了一些功能实现类似的方法大家可以自行阅读分析*/ /**     * Transfers all entries from current table to newTable.     */    void transfer(Entry[] newTable, boolean rehash) {        int newCapacity = newTable.length;        for (Entry<K,V> e : table) {            while(null != e) {                Entry<K,V> next = e.next;//是否重新进行hash计算                if (rehash) {                    e.hash = null == e.key ? 0 : hash(e.key);                }                int i = indexFor(e.hash, newCapacity);                e.next = newTable[i];                newTable[i] = e;                e = next;            }        }    }//扩展到指定的大小    void resize(int newCapacity) {        Entry[] oldTable = table;        int oldCapacity = oldTable.length;        if (oldCapacity == MAXIMUM_CAPACITY) {            threshold = Integer.MAX_VALUE;            return;        }        Entry[] newTable = new Entry[newCapacity];//重新hash        transfer(newTable, initHashSeedAsNeeded(newCapacity));        table = newTable;        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);    }//Entry类就是一个简单的键值对的类    static class Entry<K,V> implements Map.Entry<K,V> {        final K key;        V value;        Entry<K,V> next;//这是一种类似指针的东西        int hash;//还要存放hash值/*下面是一些十分基本的构造函数以及get,set方法*/        Entry(int h, K k, V v, Entry<K,V> n) {            value = v;            next = n;            key = k;            hash = h;        }        public final K getKey() {            return key;        }        public final V getValue() {            return value;        }        public final V setValue(V newValue) {            V oldValue = value;            value = newValue;            return oldValue;        }//必须要key和value都一样才equals        public final boolean equals(Object o) {            if (!(o instanceof Map.Entry))                return false;            Map.Entry e = (Map.Entry)o;            Object k1 = getKey();            Object k2 = e.getKey();            if (k1 == k2 || (k1 != null && k1.equals(k2))) {                Object v1 = getValue();                Object v2 = e.getValue();                if (v1 == v2 || (v1 != null && v1.equals(v2)))                    return true;            }            return false;        }        public final int hashCode() {            return Objects.hashCode(getKey()) ^ Objects.hashCode(getValue());        }        public final String toString() {            return getKey() + "=" + getValue();        }        /**         * This method is invoked whenever the value in an entry is         * overwritten by an invocation of put(k,v) for a key k that's already         * in the HashMap.         */        void recordAccess(HashMap<K,V> m) {        }        /**         * This method is invoked whenever the entry is         * removed from the table.         */        void recordRemoval(HashMap<K,V> m) {        }    } //根据需要，可能要扩容 //由于它由Put函数调用，调用之前已经确定表中没有key的记录 //addEntry默认当前表中没有指定key的记录，直接增加记录    void addEntry(int hash, K key, V value, int bucketIndex) {//计算存放位置        if ((size >= threshold) && (null != table[bucketIndex])) {            resize(2 * table.length);//将容量翻倍            hash = (null != key) ? hash(key) : 0;//寻找指定hash值对应的存放位置            bucketIndex = indexFor(hash, table.length);        }        createEntry(hash, key, value, bucketIndex);    }     //由于默认没有key的记录，所以直接增加    void createEntry(int hash, K key, V value, int bucketIndex) {        Entry<K,V> e = table[bucketIndex];        table[bucketIndex] = new Entry<>(hash, key, value, e);        size++;    }//类似于Entry数组的迭代器，主要是对table进行操作    private abstract class HashIterator<E> implements Iterator<E> {        Entry<K,V> next;        // next entry to return        int expectedModCount;   // For fast-fail        int index;              // current slot        Entry<K,V> current;     // current entry        HashIterator() {            expectedModCount = modCount;            if (size > 0) { // advance to first entry                Entry[] t = table;                while (index < t.length && (next = t[index++]) == null)                    ;            }        }        public final boolean hasNext() {            return next != null;        }        final Entry<K,V> nextEntry() {            if (modCount != expectedModCount)                throw new ConcurrentModificationException();            Entry<K,V> e = next;            if (e == null)                throw new NoSuchElementException();            if ((next = e.next) == null) {                Entry[] t = table;                while (index < t.length && (next = t[index++]) == null)                    ;            }            current = e;            return e;        }        public void remove() {            if (current == null)                throw new IllegalStateException();            if (modCount != expectedModCount)                throw new ConcurrentModificationException();            Object k = current.key;            current = null;            HashMap.this.removeEntryForKey(k);            expectedModCount = modCount;        }    }    private final class ValueIterator extends HashIterator<V> {        public V next() {            return nextEntry().value;        }    }    private final class KeyIterator extends HashIterator<K> {        public K next() {            return nextEntry().getKey();        }    }    private final class EntryIterator extends HashIterator<Map.Entry<K,V>> {        public Map.Entry<K,V> next() {            return nextEntry();        }    }    // Subclass overrides these to alter behavior of views' iterator() method    Iterator<K> newKeyIterator()   {        return new KeyIterator();    }    Iterator<V> newValueIterator()   {        return new ValueIterator();    }    Iterator<Map.Entry<K,V>> newEntryIterator()   {        return new EntryIterator();    }    // Views    private transient Set<Map.Entry<K,V>> entrySet = null;    /**     * Returns a link Set view of the keys contained in this map.     */    public Set<K> keySet() {        Set<K> ks = keySet;        return (ks != null ? ks : (keySet = new KeySet()));    }    private final class KeySet extends AbstractSet<K> {        public Iterator<K> iterator() {            return newKeyIterator();        }        public int size() {            return size;        }        public boolean contains(Object o) {            return containsKey(o);        }        public boolean remove(Object o) {            return HashMap.this.removeEntryForKey(o) != null;        }        public void clear() {            HashMap.this.clear();        }    }    /**     * Returns a Collection view of the values contained in this map.     */    public Collection<V> values() {        Collection<V> vs = values;        return (vs != null ? vs : (values = new Values()));    }    private final class Values extends AbstractCollection<V> {        public Iterator<V> iterator() {            return newValueIterator();        }        public int size() {            return size;        }        public boolean contains(Object o) {            return containsValue(o);        }        public void clear() {            HashMap.this.clear();        }    }    /**return a set view of the mappings contained in this map     */    public Set<Map.Entry<K,V>> entrySet() {        return entrySet0();    }    private Set<Map.Entry<K,V>> entrySet0() {        Set<Map.Entry<K,V>> es = entrySet;        return es != null ? es : (entrySet = new EntrySet());    }    private final class EntrySet extends AbstractSet<Map.Entry<K,V>> {        public Iterator<Map.Entry<K,V>> iterator() {            return newEntryIterator();        }        public boolean contains(Object o) {            if (!(o instanceof Map.Entry))                return false;            Map.Entry<K,V> e = (Map.Entry<K,V>) o;            Entry<K,V> candidate = getEntry(e.getKey());            return candidate != null && candidate.equals(e);        }        public boolean remove(Object o) {            return removeMapping(o) != null;        }        public int size() {            return size;        }        public void clear() {            HashMap.this.clear();        }    }    private static final long serialVersionUID = 362498820763181265L;    // These methods are used when serializing HashSets    int   capacity()     { return table.length; }    float loadFactor()   { return loadFactor;   }}

个人体会：

1.hash算法，通过系统得到初始化的hashSeed（可能是因为系统能够做到类似完全的随机吧），然后就开始各种的与运算，争取把元素都均匀分散开。

2.冲突(collision)解决的办法,线性探查：寻找当前位置之后可用的位置。所以在put,get的时候都要检测是否冲突，然后通过比较hash值和key来确定具体的寻找、删除、修改位置。

3.对于hashSeed的分析，由于系统函数较多，真的不够清晰，请多多指教

一起学习，一起进步，欢迎访问我的博客：http://blog.csdn.net/wanghao109