JAVA版PageRank查询代码

来源:互联网 发布:淘宝店买家晒图福利 编辑:程序博客网 时间:2024/06/04 21:46

也就是以下两段代码:

/** *  */package com.Experiment.ThemeIdentify.PageRank;/** * cee.open.pagerank.JenkinsHash.java *  * This is a Bob Jenkins hashing algorithm implementation *  * These are functions for producing 32-bit hashes for hash table lookup. * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() * are externally useful functions.  Routines to test the hash are included * if SELF_TEST is defined.  You can use this free for any purpose.  It's in * the public domain.  It has no warranty. * * @version $Revision:$ *          $Author:$ */public class JenkinsHash {    // max value to limit it to 4 bytes    private static final long MAX_VALUE = 0xFFFFFFFFL;    // internal variables used in the various calculations    long a;    long b;    long c;    /**     * Convert a byte into a long value without making it negative.     * @param b     * @return     */    private long byteToLong(byte b) {        long val = b & 0x7F;        if ((b & 0x80) != 0) {            val += 128;        }        return val;    }    /**     * Do addition and turn into 4 bytes.     * @param val     * @param add     * @return     */    private long add(long val, long add) {        return (val + add) & MAX_VALUE;    }    /**     * Do subtraction and turn into 4 bytes.     * @param val     * @param subtract     * @return     */    private long subtract(long val, long subtract) {        return (val - subtract) & MAX_VALUE;    }    /**     * Left shift val by shift bits and turn in 4 bytes.     * @param val     * @param xor     * @return     */    private long xor(long val, long xor) {        return (val ^ xor) & MAX_VALUE;    }    /**     * Left shift val by shift bits.  Cut down to 4 bytes.     * @param val     * @param shift     * @return     */    private long leftShift(long val, int shift) {        return (val << shift) & MAX_VALUE;    }    /**     * Convert 4 bytes from the buffer at offset into a long value.     * @param bytes     * @param offset     * @return     */    private long fourByteToLong(byte[] bytes, int offset) {        return (byteToLong(bytes[offset + 0])                + (byteToLong(bytes[offset + 1]) << 8)                + (byteToLong(bytes[offset + 2]) << 16)                + (byteToLong(bytes[offset + 3]) << 24));    }    /**     * Mix up the values in the hash function.     */    private void hashMix() {        a = subtract(a, b);        a = subtract(a, c);        a = xor(a, c >> 13);        b = subtract(b, c);        b = subtract(b, a);        b = xor(b, leftShift(a, 8));        c = subtract(c, a);        c = subtract(c, b);        c = xor(c, (b >> 13));        a = subtract(a, b);        a = subtract(a, c);        a = xor(a, (c >> 12));        b = subtract(b, c);        b = subtract(b, a);        b = xor(b, leftShift(a, 16));        c = subtract(c, a);        c = subtract(c, b);        c = xor(c, (b >> 5));        a = subtract(a, b);        a = subtract(a, c);        a = xor(a, (c >> 3));        b = subtract(b, c);        b = subtract(b, a);        b = xor(b, leftShift(a, 10));        c = subtract(c, a);        c = subtract(c, b);        c = xor(c, (b >> 15));    }    /**     * Hash a variable-length key into a 32-bit value.  Every bit of the     * key affects every bit of the return value.  Every 1-bit and 2-bit     * delta achieves avalanche.  The best hash table sizes are powers of 2.     *     * @param buffer       Byte array that we are hashing on.     * @param initialValue Initial value of the hash if we are continuing from     *                     a previous run.  0 if none.     * @return Hash value for the buffer.     */    public long hash(byte[] buffer, long initialValue) {        int len, pos;        // set up the internal state        // the golden ratio; an arbitrary value        a = 0x09e3779b9L;        // the golden ratio; an arbitrary value        b = 0x09e3779b9L;        // the previous hash value        c = 0x0E6359A60L;        // handle most of the key        pos = 0;        for (len = buffer.length; len >= 12; len -= 12) {            a = add(a, fourByteToLong(buffer, pos));            b = add(b, fourByteToLong(buffer, pos + 4));            c = add(c, fourByteToLong(buffer, pos + 8));            hashMix();            pos += 12;        }        c += buffer.length;        // all the case statements fall through to the next on purpose        switch (len) {            case 11:                c = add(c, leftShift(byteToLong(buffer[pos + 10]), 24));            case 10:                c = add(c, leftShift(byteToLong(buffer[pos + 9]), 16));            case 9:                c = add(c, leftShift(byteToLong(buffer[pos + 8]), 8));                // the first byte of c is reserved for the length            case 8:                b = add(b, leftShift(byteToLong(buffer[pos + 7]), 24));            case 7:                b = add(b, leftShift(byteToLong(buffer[pos + 6]), 16));            case 6:                b = add(b, leftShift(byteToLong(buffer[pos + 5]), 8));            case 5:                b = add(b, byteToLong(buffer[pos + 4]));            case 4:                a = add(a, leftShift(byteToLong(buffer[pos + 3]), 24));            case 3:                a = add(a, leftShift(byteToLong(buffer[pos + 2]), 16));            case 2:                a = add(a, leftShift(byteToLong(buffer[pos + 1]), 8));            case 1:                a = add(a, byteToLong(buffer[pos + 0]));                // case 0: nothing left to add        }        hashMix();        return c;    }    /**     * See hash(byte[] buffer, long initialValue)     *     * @param buffer Byte array that we are hashing on.     * @return Hash value for the buffer.     */    public long hash(byte[] buffer) {        return hash(buffer, 0);    }}

另一段代码:

/** *  */package com.Experiment.ThemeIdentify.PageRank;import java.net.URL;import java.net.URLConnection;import org.apache.commons.io.IOUtils;import org.apache.commons.lang.StringUtils;import org.apache.commons.lang.math.NumberUtils;import com.Experiment.ThemeIdentify.PageRank.JenkinsHash;/** * cee.open.pagerank.GooglePageRank.java *  * PageRankService provides simple API to Google PageRank Technology *  * PageRankService queries google toolbar webservice and returns a google page * rank retrieved from one of the next datacenters on the list. * toolbarqueries.google.com * * @version $Revision:$ *          $Author:$ */public class GooglePageRank {/** * List of available google datacenter IPs and addresses */static final public String GOOGLE_PR_DATACENTER_IP = "toolbarqueries.google.com";//static final public String GOOGLE_PR_DATACENTER_IP = "www.google.com";/** * Must receive a domain in form of: "http://www.domain.com" *  * @param domain  * @return PR rating (int) or -1 if unavailable or internal error happened. */public int getPageRank(String domain) {JenkinsHash jHash = new JenkinsHash();long hash = jHash.hash(("info:" + domain).getBytes());String url = "http://" + GOOGLE_PR_DATACENTER_IP+ "/tbr?client=navclient-auto&hl=en&" + "ch=6" + hash+ "&ie=UTF-8&oe=UTF-8&features=Rank&q=info:" + domain;try {URLConnection conn = new URL(url).openConnection();String pageRankResponse = IOUtils.toString(conn.getInputStream());if (StringUtils.isNotBlank(pageRankResponse)) {return NumberUtils.toInt(pageRankResponse.split(":")[2].trim());}} catch (Exception e) {e.printStackTrace();}return -1;}/** * @param args */public static void main(String[] args) {GooglePageRank prService = new GooglePageRank();System.out.println("PageRank: " + prService.getPageRank("http://www.baidu.com"));}}
原创粉丝点击