okhttp源码分析,Builder.ParseResult.parse(null, url) HttpUrl.parse(url) 方法详细分析

来源：互联网发布：知乎日报知乎好问题编辑：程序博客网时间：2024/06/06 14:41

在使用okhttp3时,以下方式具体对url进行了怎样的处理,查了许多资料没有发现有关介绍查询源码进行分析添加相关方法介绍Request request = new Request.Builder().url(" https://www.bejson.com/mam/app/download?path=/data/web/file/提莫/新y%u123+-*/ @##$提莫.txt").get().build();input 建议传入编码后的url,之后截取url时"+"默认是已经编码后的之后方法会提到
最后结论是:不会对"+"号进行编码,"#"号之后的不会进行编码(因为此例#做为query框架认为他是query和fragment的分隔符
截取点 如果#有特殊意义需要进行处理,可以参考,
url的划分规范,每个字段的含义) ,具体可以查看之后对类的具体分析

// https://www.bejson.com/mam/app/download?path=/data/web/file/提莫/新y%u123+-*/ @##$提莫.txtParseResult parse(@Nullable HttpUrl base, String input) {    // 获取input有效开始位置,过滤 case '\t': case '\n':case '\f': case '\r':case ' ':    int pos = skipLeadingAsciiWhitespace(input, 0, input.length());    // 获取input有效结束位置 case '\t': case '\n':case '\f': case '\r':case ' ':    int limit = skipTrailingAsciiWhitespace(input, pos, input.length());    // Scheme.    //  ':' 的位置,不存在或者不合法 返回-1    int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);    // 判断超文本传输协议是否合法并截取,更新有效开始位置    if (schemeDelimiterOffset != -1) {        if (input.regionMatches(true, pos, "https:", 0, 6)) {            this.scheme = "https";            pos += "https:".length();        } else if (input.regionMatches(true, pos, "http:", 0, 5)) {            this.scheme = "http";            pos += "http:".length();        } else {            return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme.        }    } else if (base != null) {        this.scheme = base.scheme;    } else {        return ParseResult.MISSING_SCHEME; // No scheme.    }    // Authority.    boolean hasUsername = false;    boolean hasPassword = false;    //  获取 / 或者 \ 的个数    int slashCount = slashCount(input, pos, limit);    if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {        // Read an authority if either:        //  * The input starts with 2 or more slashes. These follow the scheme if it exists.        //  * The input scheme exists and is different from the base URL's scheme.        //        // The structure of an authority is:        //   username:password@host:port        //        // Username, password and port are optional.        //   [username[:password]@]host[:port]        pos += slashCount;        authority:        while (true) {            //  @/\?#  其中之一符号的位置            int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");            int c = componentDelimiterOffset != limit                    ? input.charAt(componentDelimiterOffset)                    : -1;            switch (c) {                case '@'://: 截取用户名密码  目前有敏感信息都使用post 可以不用考虑                    // User info precedes.                    if (!hasPassword) {                        //获取:的位置                        int passwordColonOffset = delimiterOffset(                                input, pos, componentDelimiterOffset, ':');                        String canonicalUsername = canonicalize(                                input, pos, passwordColonOffset, USERNAME_ENCODE_SET,                                true, false, false, true, null);                        this.encodedUsername = hasUsername                                ? this.encodedUsername + "%40" + canonicalUsername                                : canonicalUsername;                        if (passwordColonOffset != componentDelimiterOffset) {                            hasPassword = true;                            this.encodedPassword = canonicalize(                                    input, passwordColonOffset + 1, componentDelimiterOffset,                                    PASSWORD_ENCODE_SET, true, false,                                    false, true, null);                        }                        hasUsername = true;                    } else {                        this.encodedPassword = this.encodedPassword + "%40" + canonicalize(                                input, pos, componentDelimiterOffset,                                PASSWORD_ENCODE_SET, true, false,                                false, true, null);                    }                    pos = componentDelimiterOffset + 1;                    break;                case -1:                case '/':                case '\\':                case '?':                case '#':                    //截取 host port                    // Host info precedes.                    int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);                    if (portColonOffset + 1 < componentDelimiterOffset) {                        this.host = canonicalizeHost(input, pos, portColonOffset);                        this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);                        if (this.port == -1)                            return ParseResult.INVALID_PORT; // Invalid port.                    } else {                        this.host = canonicalizeHost(input, pos, portColonOffset);                        this.port = defaultPort(this.scheme);                    }                    if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host.                    // 移动位置                    pos = componentDelimiterOffset;                    break authority;            }        }    } else {        // This is a relative link. Copy over all authority components. Also maybe the path & query.        this.encodedUsername = base.encodedUsername();        this.encodedPassword = base.encodedPassword();        this.host = base.host;        this.port = base.port;        this.encodedPathSegments.clear();        this.encodedPathSegments.addAll(base.encodedPathSegments());        if (pos == limit || input.charAt(pos) == '#') {            encodedQuery(base.encodedQuery());        }    }        //mam/app/download?path=/data/web/file/提莫/新y%u123+-*/ @##$提莫.txt    // Resolve the relative path.    int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");    resolvePath(input, pos, pathDelimiterOffset);    pos = pathDelimiterOffset;    // Query.
//此时截取的是"?" 和"#"之间的内容会进行编码处理但是不会对"+"编码具体看此方法canonicalize()后面会进行介绍// ?path=/data/web/file/提莫/新y%u123+-*/ @#
        if (pos < limit && input.charAt(pos) == '?') {        int queryDelimiterOffset = delimiterOffset(input, pos, limit, '#');        this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(                input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true,                false, true, true, null));        pos = queryDelimiterOffset;    }    // Fragment.    //#$提莫.txt  获取到"#"号之后的内容但是不会进行编码处理    if (pos < limit && input.charAt(pos) == '#') {        this.encodedFragment = canonicalize(                input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false, null);    }    return ParseResult.SUCCESS;}

以下两方法是针对是否编码处理判断如果是"+" plusIsSpace = true 并且

alreadyEncoded = false 才会对"+"进行编码

/* @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. * @param strict         true to encode '%' if it is not the prefix of a valid percent encoding. * @param plusIsSpace    true to encode '+' as "%2B" if it is not already encoded. * @param asciiOnly      true to encode all non-ASCII codepoints. * @param charset        which charset to use, null equals UTF-8. */static String canonicalize(String input, int pos, int limit, String encodeSet,                           boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,                           Charset charset) {    int codePoint;    for (int i = pos; i < limit; i += Character.charCount(codePoint)) {        codePoint = input.codePointAt(i);        if (codePoint < 0x20                || codePoint == 0x7f                || codePoint >= 0x80 && asciiOnly                || encodeSet.indexOf(codePoint) != -1                || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))                || codePoint == '+' && plusIsSpace) {            // Slow path: the character at i requires encoding!            Buffer out = new Buffer();            out.writeUtf8(input, pos, i);            canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,                    asciiOnly, charset);            return out.readUtf8();        }    }    // Fast path: no characters in [pos..limit) required encoding.    return input.substring(pos, limit);}


alreadyEncoded  传入的是true 之后不会针对 "+"进行编码,其他非特殊字符会进行编码
static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,                         boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,                         Charset charset) {    Buffer encodedCharBuffer = null; // Lazily allocated.    int codePoint;    for (int i = pos; i < limit; i += Character.charCount(codePoint)) {        codePoint = input.codePointAt(i);        if (alreadyEncoded                && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {            // Skip this character.        } else if (codePoint == '+' && plusIsSpace) {            // Encode '+' as '%2B' since we permit ' ' to be            // encoded as either '+' or '%20'.            out.writeUtf8(alreadyEncoded ? "+" : "%2B");        } else if (codePoint < 0x20                || codePoint == 0x7f                || codePoint >= 0x80 && asciiOnly                || encodeSet.indexOf(codePoint) != -1                || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {            // Percent encode this character.            if (encodedCharBuffer == null) {                encodedCharBuffer = new Buffer();            }            if (charset == null || charset.equals(Util.UTF_8)) {                encodedCharBuffer.writeUtf8CodePoint(codePoint);            } else {                encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset);            }            while (!encodedCharBuffer.exhausted()) {                int b = encodedCharBuffer.readByte() & 0xff;                out.writeByte('%');                out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);                out.writeByte(HEX_DIGITS[b & 0xf]);            }        } else {            // This character doesn't need encoding. Just copy it over.            out.writeUtf8CodePoint(codePoint);        }    }}

阅读全文

0 0