Java BitSet

一 BitSet简介
        实现了一个按需增长的位向量。位 set 的每个组件都有一个boolean值。用非负的整数将BitSet的位编入索引。可以对每个编入索引的位进行测试、设置或者清除。通过逻辑与、逻辑或和逻辑异或操作,可以使用一个BitSet修改另一个BitSet的内容。
        默认情况下,set 中所有位的初始值都是false。每个位 set 都有一个当前大小,也就是该位 set 当前所用空间的位数。注意,这个大小与位 set 的实现有关,所以它可能随实现的不同而更改。位 set 的长度与位 set 的逻辑长度有关,并且是与实现无关而定义的。
除非另行说明,否则将 null 参数传递给BitSet中的任何方法都将导致NullPointerException。


二 基本原理

      用1位来表示一个数据是否出现过,0为没有出现过,1表示出现过。使用用的时候既可根据某一个是否为0表示,此数是否出现过。一个1G的空间,有 8*1024*1024*1024=8.58*10^9bit,也就是可以表示85亿个不同的数。

三 Java中BitSet实现

      BitSet位于java.util这个包中,从jdk 1.0就引入了这个数据结构。本文参照的是jdk 7.0 源代码中的实现。




public class BitSet implements Cloneable, {    /*     * BitSets are packed into arrays of "words."  Currently a word is     * a long, which consists of 64 bits, requiring 6 address bits.     * The choice of word size is determined purely by performance concerns.     */    private final static int ADDRESS_BITS_PER_WORD = 6;    private final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;    private final static int BIT_INDEX_MASK = BITS_PER_WORD - 1;    /* Used to shift left or right for a partial word mask */    private static final long WORD_MASK = 0xffffffffffffffffL;    /**     * The internal field corresponding to the serialField "bits".     */    private long[] words;



/**     * Given a bit index, return word index containing it.     */    private static int wordIndex(int bitIndex) {        return bitIndex >> ADDRESS_BITS_PER_WORD;    }    /**     * Every public method must preserve these invariants.     */    private void checkInvariants() {        assert(wordsInUse == 0 || words[wordsInUse - 1] != 0);        assert(wordsInUse >= 0 && wordsInUse <= words.length);        assert(wordsInUse == words.length || words[wordsInUse] == 0);    }    /**     * Sets the field wordsInUse to the logical size in words of the bit set.     * WARNING:This method assumes that the number of words actually in use is     * less than or equal to the current value of wordsInUse!     */    private void recalculateWordsInUse() {        // Traverse the bitset until a used word is found        int i;        for (i = wordsInUse-1; i >= 0; i--)            if (words[i] != 0)                break;        wordsInUse = i+1; // The new logical size    }    /**     * Creates a new bit set. All bits are initially {@code false}.     */    public BitSet() {        initWords(BITS_PER_WORD);        sizeIsSticky = false;    }    /**     * Creates a bit set whose initial size is large enough to explicitly     * represent bits with indices in the range {@code 0} through     * {@code nbits-1}. All bits are initially {@code false}.     *     * @param  nbits the initial size of the bit set     * @throws NegativeArraySizeException if the specified initial size     *         is negative     */    public BitSet(int nbits) {        // nbits can't be negative; size 0 is OK        if (nbits < 0)            throw new NegativeArraySizeException("nbits < 0: " + nbits);        initWords(nbits);        sizeIsSticky = true;    }    private void initWords(int nbits) {        words = new long[wordIndex(nbits-1) + 1];    }

       我们可以看到BitSet有两个构造方法:不带参数和带参数,不带参数的构造函数默认的初始大小为, 2^6 = 64-1=63 bit. 我们知道java中long的大小就是8个字节,也就是8*8=64bit。也就是说,bitset默认的是一个long整形的大小。带参数的构造方法指定了总共的bit位数,此方法会将其bit位数规整到一个大于或者等于这个数字的64的整倍数。比如64位,BitSet的大小是1个long,而65位时,则BitSet大小是2个long,即128位。做这么一个规定,主要是为了内存对齐,同时避免考虑到不要处理特殊情况。


/**     * Sets the bit at the specified index to the complement of its     * current value.     *     * @param  bitIndex the index of the bit to flip     * @throws IndexOutOfBoundsException if the specified index is negative     * @since  1.4     */    public void flip(int bitIndex) {        if (bitIndex < 0)            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);        int wordIndex = wordIndex(bitIndex);        expandTo(wordIndex);        words[wordIndex] ^= (1L << bitIndex);        recalculateWordsInUse();        checkInvariants();    }

反转操作分为两步,找到对应的long, 获取mask并与指定的位进行xor操作。
int wordIndex = wordIndex(bitIndex);
words[wordIndex] ^= (1L << bitIndex);
在进行操作之前,执行了一个函数 expandTo(wordIndex); 这个函数是确保bitset中有对应的这个long。如果没有的话,就对bitset中的long数组进行扩展。扩展的策略,是将当前的空间翻一倍。

/**     * Ensures that the BitSet can hold enough words.     * @param wordsRequired the minimum acceptable number of words.     */    private void ensureCapacity(int wordsRequired) {        if (words.length < wordsRequired) {            // Allocate larger of doubled size or required size            int request = Math.max(2 * words.length, wordsRequired);            words = Arrays.copyOf(words, request);            sizeIsSticky = false;        }    }    /**     * Ensures that the BitSet can accommodate a given wordIndex,     * temporarily violating the invariants.  The caller must     * restore the invariants before returning to the user,     * possibly using recalculateWordsInUse().     * @param wordIndex the index to be accommodated.     */    private void expandTo(int wordIndex) {        int wordsRequired = wordIndex+1;        if (wordsInUse < wordsRequired) {            ensureCapacity(wordsRequired);            wordsInUse = wordsRequired;        }    }


/**     * Sets the bit at the specified index to {@code true}.     *     * @param  bitIndex a bit index     * @throws IndexOutOfBoundsException if the specified index is negative     * @since  JDK1.0     */    public void set(int bitIndex) {        if (bitIndex < 0)            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);        int wordIndex = wordIndex(bitIndex);        expandTo(wordIndex);        words[wordIndex] |= (1L << bitIndex); // Restores invariants        checkInvariants();    }



/**     * Returns the value of the bit with the specified index. The value     * is {@code true} if the bit with the index {@code bitIndex}     * is currently set in this {@code BitSet}; otherwise, the result     * is {@code false}.     *     * @param  bitIndex   the bit index     * @return the value of the bit with the specified index     * @throws IndexOutOfBoundsException if the specified index is negative     */    public boolean get(int bitIndex) {        if (bitIndex < 0)            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);        checkInvariants();        int wordIndex = wordIndex(bitIndex);        return (wordIndex < wordsInUse)            && ((words[wordIndex] & (1L << bitIndex)) != 0);    }



6.1  清空所有的bit位,即全部置0。通过循环方式来以此以此置0。

/**     * Sets all of the bits in this BitSet to {@code false}.     *     * @since 1.4     */    public void clear() {        while (wordsInUse > 0)            words[--wordsInUse] = 0;    }

6.2 clear某一位

/**     * Sets the bit specified by the index to {@code false}.     *     * @param  bitIndex the index of the bit to be cleared     * @throws IndexOutOfBoundsException if the specified index is negative     * @since  JDK1.0     */    public void clear(int bitIndex) {        if (bitIndex < 0)            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);        int wordIndex = wordIndex(bitIndex);        if (wordIndex >= wordsInUse)            return;        words[wordIndex] &= ~(1L << bitIndex);        recalculateWordsInUse();        checkInvariants();    }

a. 找到对应的long。 这行语句是  int wordIndex = wordIndex(bitIndex);  
b. 操作对应的位。首先获取 mask(掩码),对于 clear某一位来说,它需要的掩码是指定位为0,其余位为1,然后与对应的long进行&运算。
   ~(1L << bitIndex);  即获取mask
  words[wordIndex] &= ~(1L << bitIndex); 执行相应的运算。

7.获取BitSet 的size

/**     * Returns the number of bits of space actually in use by this     * {@code BitSet} to represent bit values.     * The maximum element in the set is the size - 1st element.     *     * @return the number of bits currently in this bit set     */    public int size() {        return words.length * BITS_PER_WORD;    }

8. cardinality

/**     * Returns the number of bits set to {@code true} in this {@code BitSet}.     *     * @return the number of bits set to {@code true} in this {@code BitSet}     * @since  1.4     */    public int cardinality() {        int sum = 0;        for (int i = 0; i < wordsInUse; i++)            sum += Long.bitCount(words[i]);        return sum;    }

9. nextSetBit

/**     * Returns the index of the first bit that is set to {@code true}     * that occurs on or after the specified starting index. If no such     * bit exists then {@code -1} is returned.     *     * <p>To iterate over the {@code true} bits in a {@code BitSet},     * use the following loop:     *     *  <pre> {@code     * for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i+1)) {     *     // operate on index i here     * }}</pre>     *     * @param  fromIndex the index to start checking from (inclusive)     * @return the index of the next set bit, or {@code -1} if there     *         is no such bit     * @throws IndexOutOfBoundsException if the specified index is negative     * @since  1.4     */    public int nextSetBit(int fromIndex) {        if (fromIndex < 0)            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);        checkInvariants();        int u = wordIndex(fromIndex);        if (u >= wordsInUse)            return -1;        long word = words[u] & (WORD_MASK << fromIndex);        while (true) {            if (word != 0)                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);            if (++u == wordsInUse)                return -1;            word = words[u];        }    }

四 使用场景


下面给出一个 简单的排序示例,如下:

package;import java.util.BitSet;public class BitSetTest {/** * @param args */public static void main(String[] args) {containChars("abcdfab");int[] array = new int[] { 423, 700, 9999, 2323, 356, 6400, 1,2,3,2,2,2,2 };sortArray(array);}/**     * 求一个字符串包含的char     *      */    public static void containChars(String str) {        BitSet used = new BitSet();        for (int i = 0; i < str.length(); i++)            used.set(str.charAt(i)); // set bit for char         StringBuilder sb = new StringBuilder();        sb.append("[");        int size = used.size();        System.out.println(size);        for (int i = 0; i < size; i++) {            if (used.get(i)) {                sb.append((char) i);            }        }        sb.append("]");        System.out.println(sb.toString());    }        /**     * 排序     */    public static void sortArray(int[] array) {                BitSet bitSet = new BitSet(2 << 13);        // 虽然可以自动扩容,但尽量在构造时指定估算大小,默认为64        System.out.println("BitSet size: " + bitSet.size());         for (int i = 0; i < array.length; i++) {            bitSet.set(array[i]);        }        //剔除重复数字后的元素个数        int bitLen=bitSet.cardinality();             //进行排序,即把bit为true的元素复制到另一个数组        int[] orderedArray = new int[bitLen];        int k = 0;        for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {            orderedArray[k++] = i;        }         System.out.println("After ordering: ");        for (int i = 0; i < bitLen; i++) {            System.out.print(orderedArray[i] + "\t");        }                 System.out.println("iterate over the true bits in a BitSet");        //或直接迭代BitSet中bit为true的元素iterate over the true bits in a BitSet        for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {            System.out.print(i+"\t");        }        System.out.println("---------------------------");    }}

