源码分析之-容器类-HashMap

来源：互联网发布：青铜器软件编辑：程序博客网时间：2024/05/09 08:17

一、HashMap基本原理
HashMap采用Entry数组来存储key-value对，每一个键值对组成了一个Entry实体，Entry类实际上是一个单向的链表结构，它具有Next指针，可以连接下一个Entry实体，依次来解决Hash冲突的问题，因为HashMap是按照Key的hash值来计算Entry在HashMap中存储的位置的，如果hash值相同，而key内容不相等，就用链表来解决这种hash冲突。

二、HashMap源码分析

本文以JDK7源码分析一下HashMap的关键代码。

 private static int roundUpToPowerOf2(int number) {        // assert number >= 0 : "number must be non-negative";        return number >= MAXIMUM_CAPACITY                ? MAXIMUM_CAPACITY                : (number > 1) ? Integer.highestOneBit((number - 1) << 1) : 1;    }

Integer.highestOneBit方法的作用是二进制位除最左面为1的位之外全部置为0的值。roundUpToPowerOf2方法的作用是取比给定值稍大的2的次幂。至于这里为什么要取2的次幂，原因如下：（1）浪费空间（2）减低查询效率 举例：     当数组长度为15时，添加数组时h & (length-1)计算成为hash&14（0x1110）,那么最后一位永远是0，从而造成table数组中 1（0x0001），3（0x0011），5（0x0101），7（0x0111），9（0x1001），11（0x1011）等位置永远不可以存放数据，从而造成空间浪费；更糟的是这种情况中，数组可以使用的位置比数组长度小了很多，这意味着进一步增加了碰撞的几率，单个桶里保存的数据多，从而减慢了查询的效率。

 /**     * Initialize the hashing mask value. We defer initialization until we     * really need it.     */    final boolean initHashSeedAsNeeded(int capacity) {        boolean currentAltHashing = hashSeed != 0;        boolean useAltHashing = sun.misc.VM.isBooted() &&                (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);        boolean switching = currentAltHashing ^ useAltHashing;        if (switching) {            hashSeed = useAltHashing                ? sun.misc.Hashing.randomHashSeed(this)                : 0;        }        return switching;    }

 用指定初始容量和指定加载因子构造一个新的空哈希表。其中initHashSeedAsNeeded方法用于初始化hashSeed参数，其中hashSeed用于计算key的hash值，它与key的hashCode进行按位异或运算。这个hashSeed是一个与实例相关的随机值，主要用于解决hash冲突。

    final int hash(Object k) {        int h = hashSeed;        if (0 != h && k instanceof String) {            return sun.misc.Hashing.stringHash32((String) k);        }        h ^= k.hashCode();        // This function ensures that hashCodes that differ only by        // constant multiples at each bit position have a bounded        // number of collisions (approximately 8 at default load factor).        h ^= (h >>> 20) ^ (h >>> 12);        return h ^ (h >>> 7) ^ (h >>> 4);    }

   因为length为2的指数倍，所以length-1所对应的二进制位都为1，然后在与hashCode(key)做与运算，即可得到[0,length)内的索引，但是这里有个问题，如果hashCode(key)的大于length的值，而且hashCode(key)的二进制位的低位变化不大，那么冲突就会很多。造成冲突的原因关键在于16限制了只能用低位来计算，高位直接舍弃了，所以我们需要额外的哈希函数而不只是简单的对象的hashCode方法了。具体来说，就是HashMap中hash函数干的事了。

  /**     * 返回hashcode=h的索引位置.     */    static int indexFor(int h, int length) {        // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";        return h & (length-1);    }    /**     * 根据key查找entry，从代码可以看出key=null是合法的     */    final Entry<K,V> getEntry(Object key) {        if (size == 0) {            return null;        }        int hash = (key == null) ? 0 : hash(key);        for (Entry<K,V> e = table[indexFor(hash, table.length)];             e != null;             e = e.next) {            Object k;            if (e.hash == hash &&                ((k = e.key) == key || (key != null && key.equals(k))))                return e;        }        return null;    }

    /**     *      */    public V put(K key, V value) {        //判断数组table是否为空        if (table == EMPTY_TABLE) {            inflateTable(threshold);        }        //如果key为空调用putForNullKey方法添加键为null的entry        if (key == null)            return putForNullKey(value);        //得到key的hash值        int hash = hash(key);        //根据上面得到的hash值得到应该放到table中的位置        int i = indexFor(hash, table.length);        //遍历i位置的链表        for (Entry<K,V> e = table[i]; e != null; e = e.next) {            Object k;            //判断i位置是否存在相同的key的entry，存在则用新值替换老的值            if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {                V oldValue = e.value;                e.value = value;                e.recordAccess(this);                return oldValue;            }        }        modCount++;        //如果走到这里，说明map中不存在键为key的entry        addEntry(hash, key, value, i);        return null;    }

    void addEntry(int hash, K key, V value, int bucketIndex) {        // 如果当前table的大小大于阈值且当前位置的桶有数据，则对map进行扩容到原来的2倍        if ((size >= threshold) && (null != table[bucketIndex])) {            resize(2 * table.length);            //扩容后重新计算key的hash值和应放置的位置            hash = (null != key) ? hash(key) : 0;            bucketIndex = indexFor(hash, table.length);        }        //根据传入的参数创建一个新的entry放置在位置bucketIndex处        createEntry(hash, key, value, bucketIndex);    }

    void createEntry(int hash, K key, V value, int bucketIndex) {        //先取出位置bucketIndex处的entry，这里是位置bucketIndex处链表的头结点        Entry<K,V> e = table[bucketIndex];        //把新建的entry放置到位置bucketIndex处作为该处链表的头结点，并把此处原来的entry设置为                 //新entry的后继节点，也就是说每插入一个新值，都会放到链表的最前端。        table[bucketIndex] = new Entry<>(hash, key, value, e);        size++;    }

    /**     * 插null值，从代码可以看出，null值全部被插到第一个桶里面，而且只能插入一个key为null的数据     * 后面key为null的数据不会被插入     */    private V putForNullKey(V value) {        for (Entry<K,V> e = table[0]; e != null; e = e.next) {            if (e.key == null) {                V oldValue = e.value;                e.value = value;                e.recordAccess(this);                return oldValue;            }        }        modCount++;        addEntry(0, null, value, 0);        return null;    }

阅读全文

1 0