Java源码阅读之String

来源:互联网 发布:淘宝上可以贷款吗 编辑:程序博客网 时间:2024/05/17 01:40

Summary:

  • String中的字符串是存储在一个char[]数组中的;hash值的结果受char数组中的值影响;
  • 构造器参数如果不是String,则都是需要由系统生成一个数组;如果参数给定了一个char[]还会最终调用native方法System.arrayCopy();
  • length()、isEmpty()、charAt()等对当前String对象无任何影响的操作都是可以看成对char数组的操作
  • getChars、toCharArray、copyValueOf等最终都需要调用System.arrayCopy()方法;
  • 其它方法:
    • equals() {实现:两个char[]数据逐个比较}
    • replace() {实现:先拷贝char[]数据然后逐个检测替换}
    • indexOf() {实现:没有采用kmp算法,使用的是穷举法}
    • split() {实现:利用indexOf和substring方法配合使用}

Fields:

private final char value[];private int hash; // Default to 0

Constructor:

//建立一个长度为0的数组public String() {        this.value = new char[0];}//改变了下成员变量的引用所指的对象而已没有新建对象public String(String original) {        this.value = original.value;        this.hash = original.hash;}//调用Arrays.copyOf方法//Arrays.copyOf方法最终调用java.lang.System.arraycopy方法;//java.lang.System.arraycopy方法声明为public static native void arraycopy();public String(char value[]) {        this.value = java.util.Arrays.copyOf(value, value.length);}    public static char[] copyOf(char[] original, int newLength) {        char[] copy = new char[newLength];        System.arraycopy(original, 0, copy, 0,                         Math.min(original.length, newLength));        return copy;    }

length():

//等同于返回数组的长度public int length() {    return value.length;}   

isEmpty():

//等同于判断数组的长度是否为0public boolean isEmpty() {        return value.length == 0;}

charAt():

//等同于数组的随机访问操作public char charAt(int index) {        if ((index < 0) || (index >= value.length)) {            throw new StringIndexOutOfBoundsException(index);        }        return value[index];}

getChars():

//调用System.arraycopy方法,参数为本String的char[]void getChars(char dst[], int dstBegin) {        System.arraycopy(value, 0, dst, dstBegin, value.length);}

toCharArray():

//调用System.arraycopy方法,参数为本String的char[]public char[] toCharArray() {        // Cannot use Arrays.copyOf because of class initialization order issues        char result[] = new char[value.length];        System.arraycopy(value, 0, result, 0, value.length);        return result;}

copyValueOf():

//调用Sring构造器public static String copyValueOf(char data[], int offset, int count) {        return new String(data, offset, count);}

equals():

//首先判断是否是同一个引用//接着判断长度是否相同//最后逐个比较char数组的字符是否相同public boolean equals(Object anObject) {        if (this == anObject) {            return true;        }        if (anObject instanceof String) {            String anotherString = (String)anObject;            int n = value.length;            if (n == anotherString.value.length) {                char v1[] = value;                char v2[] = anotherString.value;                int i = 0;                while (n-- != 0) {                    if (v1[i] != v2[i])                        return false;                    i++;                }                return true;            }        }        return false;}

hashCode():

//在返回hash之前如果hash为0且char数组长度大于0则求出hash值之后再返回hash数值public int hashCode() {        int h = hash;        if (h == 0 && value.length > 0) {            char val[] = value;                        for (int i = 0; i < value.length; i++) {                h = 31 * h + val[i];            }            hash = h;        }        return h;}

indexOf():

//该字符跟数组中的每个字符从左往右比较//lastIndexOf一样只不过是从右往左比较public int indexOf(int ch, int fromIndex) {        final int max = value.length;        if (fromIndex < 0) {            fromIndex = 0;        } else if (fromIndex >= max) {            // Note: fromIndex might be near -1>>>1.            return -1;        }        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {            // handle most cases here (ch is a BMP code point or a            // negative value (invalid code point))            final char[] value = this.value;            for (int i = fromIndex; i < max; i++) {                if (value[i] == ch) {                    return i;                }            }            return -1;        } else {            return indexOfSupplementary(ch, fromIndex);        }}//indexOf最终是调用下面的第二个static方法来进行求解的//求解步骤大概是://首先搜索到第一个字符所在的位置,之后逐个比较;//这里并没有使用kmp算法因此是一个可以优化的地方public int indexOf(String str, int fromIndex) {        return indexOf(value, 0, value.length,                str.value, 0, str.value.length, fromIndex);}static int indexOf(char[] source, int sourceOffset, int sourceCount,            char[] target, int targetOffset, int targetCount,            int fromIndex) {        if (fromIndex >= sourceCount) {            return (targetCount == 0 ? sourceCount : -1);        }        if (fromIndex < 0) {            fromIndex = 0;        }        if (targetCount == 0) {            return fromIndex;        }        char first = target[targetOffset];        int max = sourceOffset + (sourceCount - targetCount);        for (int i = sourceOffset + fromIndex; i <= max; i++) {            /* Look for first character. */            if (source[i] != first) {                while (++i <= max && source[i] != first);            }            /* Found first character, now look at the rest of v2 */            if (i <= max) {                int j = i + 1;                int end = j + targetCount - 1;                for (int k = targetOffset + 1; j < end && source[j]                        == target[k]; j++, k++);                if (j == end) {                    /* Found whole string. */                    return i - sourceOffset;                }            }        }        return -1;}

contains():

//通过indexOf方法的返回值判断public boolean contains(CharSequence s) {        return indexOf(s.toString()) > -1;}

substring():

//最终是调用new String构造器,构造器参考前面的叙述public String substring(int beginIndex, int endIndex) {        if (beginIndex < 0) {            throw new StringIndexOutOfBoundsException(beginIndex);        }        if (endIndex > value.length) {            throw new StringIndexOutOfBoundsException(endIndex);        }        int subLen = endIndex - beginIndex;        if (subLen < 0) {            throw new StringIndexOutOfBoundsException(subLen);        }        return ((beginIndex == 0) && (endIndex == value.length)) ? this                : new String(value, beginIndex, subLen);}

replace():

//通过拷贝原String中的数组数据,随后对新数组更新字符//最后通过新数组构造一个String返回,原String对象等待被回收public String replace(char oldChar, char newChar) {        if (oldChar != newChar) {            int len = value.length;            int i = -1;            char[] val = value; /* avoid getfield opcode */            while (++i < len) {                if (val[i] == oldChar) {                    break;                }            }            if (i < len) {                char buf[] = new char[len];                for (int j = 0; j < i; j++) {                    buf[j] = val[j];                }                while (i < len) {                    char c = val[i];                    buf[i] = (c == oldChar) ? newChar : c;                    i++;                }                return new String(buf, true);            }        }        return this;}

split():

//根据regex数据来将String划分成多个子串//limit为-1则进行进行任意次比较,//limit为0则进行进行任意次比较,但是会将最后长度为0的空串删除//limit大于0则最进行最多limit-1次比较,返回子串个数不超过n;public String[] split(String regex, int limit) {        /* fastpath if the regex is a         (1)one-char String and this character is not one of the            RegEx's meta characters ".$|()[{^?*+\\", or         (2)two-char String and the first char is the backslash and            the second is not the ascii digit or ascii letter.         */        char ch = 0;        if (    (                    //regex长度为1,且这个字符不是正则表达式中的某个符号                    //或者,长度为2且第一个字符还是'\'且第二个字符不是asci不是字母也不是数字                    //(这里判断是否在某个区间的方法采用或运算,(data-low)|(data-high)<0则不在该范围内,这里或运算相当于是取最小值)                    (regex.value.length == 1 &&".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||                    (    regex.length() == 2 &&                         regex.charAt(0) == '\\' &&                        (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&                        ((ch-'a')|('z'-ch)) < 0 &&                        ((ch-'A')|('Z'-ch)) < 0                    )                ) &&                (                    //ch字符不在Character.MIN_HIGH_SURROGATE和Character.MAX_LOW_SURROGATE之间                    ch < Character.MIN_HIGH_SURROGATE ||                    ch > Character.MAX_LOW_SURROGATE                )           )        {            int off = 0;            int next = 0;            boolean limited = limit > 0;            ArrayList<String> list = new ArrayList<>();            while ((next = indexOf(ch, off)) != -1) {                if (!limited || list.size() < limit - 1) {                    list.add(substring(off, next));                    off = next + 1;                } else {    // last one                    //assert (list.size() == limit - 1);                    list.add(substring(off, value.length));                    off = value.length;                    break;                }            }            // If no match was found, return this            if (off == 0)                return new String[]{this};            // Add remaining segment            if (!limited || list.size() < limit)                list.add(substring(off, value.length));            // Construct result            int resultSize = list.size();            if (limit == 0) {                while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {                    resultSize--;                }            }            String[] result = new String[resultSize];            return list.subList(0, resultSize).toArray(result);        }        return Pattern.compile(regex).split(this, limit);}


1 0