java 基数估值
来源:互联网 发布:手机怎么淘宝购物 编辑:程序博客网 时间:2024/04/29 14:59
通过基数估值的方法来得到大量数据中重复的列。
算法步骤:随机生成n多数据,利用murmurhash,得到32位的hash值,通过2 de 10分桶,来计算
package test;import java.util.HashMap;import java.util.Iterator;import java.util.Random; class jishuguzhi { public static void main(String args[]) { engine en=new engine(); en.jisuan(); } } class engine { HashMap<Integer,Integer> map=new HashMap<Integer, Integer>();MurmurHash m=new MurmurHash();public long gethash(byte [] data,int length,int seed){return m.hash32(data, length, seed); }public void jisuan(){//分桶数为pow(2,10);long top=100000000000l;while(top-->=0){Random r=new Random();long x=r.nextLong();byte[]data= String.valueOf(x).getBytes();int length=data.length;long iwant=this.gethash(data,length ,33333333);//获得二进制字符串。 String iwantyou=Long.toBinaryString(iwant); int distance=iwantyou.length()-10; String tongn; if(distance>=0) { tongn=iwantyou.substring(0, 10-1); } else { tongn=iwantyou.substring(0, iwantyou.length()-1); } //位数不够就在后面加0; int shi=Integer.valueOf(tongn,2); System.out.println(tongn); if(map.containsKey(shi)) { this.map.put(Integer.valueOf(tongn), this.map.get(shi)+1); } else { this.map.put(Integer.valueOf(tongn), 1); }}//迭代出map的值。Iterator it = map.keySet().iterator();while(it.hasNext()){Integer key = (Integer) it.next();int value = map.get(key);System.out.println(key + "→" + value);}} } final class MurmurHash { private static final long serialVersionUID = 4342869264396184799L; // all methods static; private constructor. public MurmurHash() { } protected byte[] toBytesWithoutEncoding(String str) { int len = str.length(); int pos = 0; byte[] buf = new byte[len << 1]; for (int i = 0; i < len; i++) { char c = str.charAt(i); buf[pos++] = (byte) (c & 0xFF); buf[pos++] = (byte) (c >> 8); } return buf; } public int hashcode(String str) { byte[] bytes = toBytesWithoutEncoding(str); return hash32(bytes, bytes.length); } /** * Generates 32 bit hash from byte array of the given length and * seed. * * @param data byte array to hash * @param length length of the array to hash * @param seed initial seed value * @return 32 bit hash of the given array */public int hash32( final byte[] data, int length, int seed) { // 'm' and 'r' are mixing constants generated offline.// They're not really 'magic', they just happen to work well.final int m = 0x5bd1e995;final int r = 24; // Initialize the hash to a random valueint h = seed^length;int length4 = length/4; for (int i=0; i<length4; i++) { final int i4 = i*4;int k = (data[i4+0]&0xff) +((data[i4+1]&0xff)<<8)+((data[i4+2]&0xff)<<16) +((data[i4+3]&0xff)<<24);k *= m;k ^= k >>> r;k *= m;h *= m;h ^= k; } // Handle the last few bytes of the input arrayswitch (length%4) { case 3: h ^= (data[(length&~3) +2]&0xff) << 16;case 2: h ^= (data[(length&~3) +1]&0xff) << 8;case 1: h ^= (data[length&~3]&0xff);h *= m; } h ^= h >>> 13;h *= m;h ^= h >>> 15; return h; } /** * Generates 32 bit hash from byte array with default seed value. * * @param data byte array to hash * @param length length of the array to hash * @return 32 bit hash of the given array */public int hash32( final byte[] data, int length) { return hash32( data, length, 0x9747b28c); } /** * Generates 64 bit hash from byte array of the given length and seed. * * @param data byte array to hash * @param length length of the array to hash * @param seed initial seed value * @return 64 bit hash of the given array */public long hash64( final byte[] data, int length, int seed) { final long m = 0xc6a4a7935bd1e995L;final int r = 47; long h = (seed&0xffffffffl)^(length*m); int length8 = length/8; for (int i=0; i<length8; i++) { final int i8 = i*8;long k = ((long)data[i8+0]&0xff) +(((long)data[i8+1]&0xff)<<8)+(((long)data[i8+2]&0xff)<<16) +(((long)data[i8+3]&0xff)<<24)+(((long)data[i8+4]&0xff)<<32) +(((long)data[i8+5]&0xff)<<40)+(((long)data[i8+6]&0xff)<<48) +(((long)data[i8+7]&0xff)<<56); k *= m;k ^= k >>> r;k *= m; h ^= k;h *= m; } switch (length%8) { case 7: h ^= (long)(data[(length&~7)+6]&0xff) << 48;case 6: h ^= (long)(data[(length&~7)+5]&0xff) << 40;case 5: h ^= (long)(data[(length&~7)+4]&0xff) << 32;case 4: h ^= (long)(data[(length&~7)+3]&0xff) << 24;case 3: h ^= (long)(data[(length&~7)+2]&0xff) << 16;case 2: h ^= (long)(data[(length&~7)+1]&0xff) << 8;case 1: h ^= (long)(data[length&~7]&0xff);h *= m; }; h ^= h >>> r;h *= m;h ^= h >>> r; return h; } /** * Generates 64 bit hash from byte array with default seed value. * * @param data byte array to hash * @param length length of the array to hash * @return 64 bit hash of the given string */public long hash64( final byte[] data, int length) { return hash64( data, length, 0xe17a1465); } }
0 0
- java 基数估值
- 企业估值
- 大盘估值状态
- 市场估值探讨
- TPC-C估值
- 估值与爬山
- 简单估值
- 公司估值
- 互联网公司估值
- 基数
- 二手车估值接口 返回合作伙伴的估值记录
- 计算(估值线段树)
- 优化黑白棋估值参数
- 黑白棋模板估值简介
- 银行业的长期估值,预测更正
- 银行业的长期估值(中)
- 猪哥哥说:股价估值
- 再说《目前的估值安全吗?》
- 单例模式(懒汉方式和饿汉方式)
- MVP in Android
- Hamiltonian Cycle
- Android基础-ProgressDialog用法
- 剑指offer面试题3之二维数组中的查找
- java 基数估值
- HDU-1257-最少拦截系统
- UILabel行间距及其用法
- const在c中的用法
- hdu4057 Rescue the Rabbit【AC自动机+dp滚动数组】
- 2016年蓝桥个人赛赛前总复习 个人经验总结
- LCS(最长公共子序列)
- Yahoo! Hadoop Module 1: Tutorial Introduction
- 多线程 : Java 信号量 Semaphore 使用