内存替换redis hll

来源:互联网 发布:淘宝免费买东西的软件 编辑:程序博客网 时间:2024/06/05 12:42
 
/**
 * 内存Hll实现,状态可以通过HBaseHandler和RedisHandler来切换、
 * 
 * @author mosi.li
 * 
 */
public class AggUvMemHll2HbaseBolt extends{
private final static Logger LOG = LoggerFactory
.getLogger(AggUvMemHll2HbaseBolt.class);
private Map<byte[], HyperLogLog> cidBufHllMem = new HashMap<byte[], HyperLogLog>();// 主要缓存结构
private Map<byte[], HyperLogLog> pendingCache = new HashMap<byte[], HyperLogLog>();// 处理区间落差
private boolean isDoingHbaseState = false;// 初始不入库
private boolean isTransf = false;// 初始不入库


// List<Get> gets = new ArrayList<Get>();// 辅助快速定位key


@Override
protected void execute(Tuple input) {
String key = input.getStringByField(Constant.AGGREGATE_UV_FIELD);
String cookie_id = input.getStringByField(Constant.COOKIE_ID);
if (isTransf) {
pending2Root();
isTransf = false;
}
if (!isDoingHbaseState)
doMemUV(key, cookie_id, cidBufHllMem);// hbase空闲。没在操作HBase
else
doMemUV(key, cookie_id, pendingCache);// 正在进行HBase入库的动作


}


private void doMemUV(String key, String cookie_id,
Map<byte[], HyperLogLog> memCache) {
HyperLogLog hyperLogLog = memCache.get(key);
if (hyperLogLog == null) {
hyperLogLog = new HyperLogLog(16);
}
hyperLogLog.offer(cookie_id);// 复杂度为 O(N)
memCache.put(key.getBytes(), hyperLogLog);
// if (!memCache.containsKey(key.getBytes())) {
// Get get = new Get(key.getBytes());
// gets.add(get);
// }
}


/**
* 把pending状态的数据,转给缓存<br/>
* 保留一个优化点,可以使用交换a,b指针来快速变换两个对象。但是程序复杂度更高。先跑起来看看效率如何
*/
private void pending2Root() {
for (byte[] key : pendingCache.keySet())
cidBufHllMem.put(key, pendingCache.get(key));
pendingCache.clear();
}






@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
super.prepare(stormConf, context, collector);
new Thread(new HBaseHandler()).start();
// new Thread(new RedisHandler()).start();
}


// ---------------------------------------------------------------------------
// --------------------------hll中间状态存在redis------------------------------
// ---------------------------------------------------------------------------
private class RedisHandler extends Hll2HBaseHandler<byte[]> {


@Override
protected boolean isHbaseState() {
return false;
}


@Override
public HashMap<byte[], HyperLogLog> multiGet() throws IOException {
initRedisConnection();
// 1
HashMap<byte[], Response<byte[]>> newMap = new HashMap<byte[], Response<byte[]>>();
for (byte[] key : cidBufHllMem.keySet()) {
newMap.put(key, pipeline.get(key));
}
pipeline.sync();
// 2
for (byte[] key : newMap.keySet()) {
redis.clients.jedis.Response<byte[]> rsp = pipeline.get(key);
if (rsp == null)
continue;
byte[] value = rsp.get();
HyperLogLog hyperLogLog = HyperLogLog.Builder.build(value);
result.put(key, hyperLogLog);
}
return result;
}


@Override
protected void mutiPut() {
for (byte[] key : temResult.keySet()) {
pipeline.set(key, temResult.get(key));
}
pipeline.sync();
temResult.clear();
}


@Override
protected void prepareMutiPut(byte[] key, byte[] bytes) {
temResult.put(key, bytes);
}


private void initRedisConnection() {
if (jedis == null || !jedis.isConnected() || pipeline == null) {
try {
jedis = new Jedis(hostname, port, timeout);
jedis.connect();
pipeline = jedis.pipelined();
} catch (Exception e) {
LOG.error("redis connection fail......" + e.getMessage());
}
}
}


HashMap<byte[], byte[]> temResult = new HashMap<byte[], byte[]>();
String hostname;
int port;
int timeout;
Jedis jedis = new Jedis(hostname, port, timeout);
Pipeline pipeline;


}


// ---------------------------------------------------------------------------
// --------------------------hbase中间状态存在hbase------------------------------
// ---------------------------------------------------------------------------
private class HBaseHandler extends Hll2HBaseHandler<Get> {
public HashMap<byte[], HyperLogLog> multiGet() throws IOException {
for (byte[] key : cidBufHllMem.keySet())
gets.add(new Get(key));
Result[] results = htable.get(gets);
// 2
for (Result _res : results) {
if (_res == null || !_res.isEmpty())
continue;
byte[] key = _res.getRow();
byte[] value = _res.getValue(columnFamily.getBytes(),
qualifier.getBytes());
HyperLogLog hyperLogLog = HyperLogLog.Builder.build(value);
result.put(key, hyperLogLog);
}
return result;


}


@Override
protected boolean isHbaseState() {
return true;
}


@Override
protected void mutiPut() {
// hbase中间结果不需要实现
}


@Override
protected void prepareMutiPut(byte[] key, byte[] bytes) {
// hbase中间结果不需要实现
}
}


// ---------------------------------------------------------------------------------
// ----------Hll定时入库逻辑,最后把hll的结果写到hbase里,uv字段提供前台--------------
// ---------------------------------------------------------------------------------


private abstract class Hll2HBaseHandler<T> implements Runnable {


@Override
public void run() {
while (true) {
long now = System.currentTimeMillis();
// 效率不行的话就另开一个线程来消化。
if (now - lastUpdateTime > interval
|| cidBufHllMem.size() > writeSize) {
isDoingHbaseState = true;
doHll2HBase();
lastUpdateTime = now;
cidBufHllMem.clear();
// gets.clear();
puts.clear();
result.clear();
isDoingHbaseState = false;
isTransf = true;
}
}
}


private void doHll2HBase() {
try {
htable = HConnectionPool.getHConnection().getTable(htableName);
execute();
} catch (Exception e) {
LOG.error("Aggregate uv by hbase hll failed. ", e);
e.printStackTrace();
} finally {
try {
htable.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}


private void execute() throws IOException, CardinalityMergeException {
multiGet();
for (byte[] key : cidBufHllMem.keySet())
doHllMerge(key);
doHllPersist();
}


// 防止jvm异常关机,恢复状态
private void doHllMerge(byte[] key) throws IOException,
CardinalityMergeException {
HyperLogLog memHll = cidBufHllMem.get(key);
HyperLogLog hllInRedis = result.get(key);
if (hllInRedis == null
|| memHll.cardinality() == hllInRedis.cardinality()) {// O(1)复杂度
return;
}
HyperLogLog mergeHll = (HyperLogLog) memHll.merge(hllInRedis);
Put put = new Put(key);
if (isHbaseState())
put.add(columnFamily.getBytes(), qualifier.getBytes(),
mergeHll.getBytes());
else
prepareMutiPut(key, mergeHll.getBytes());
put.add(columnFamily.getBytes(), uv.getBytes(),
String.valueOf(mergeHll.cardinality()).getBytes());
puts.add(put);
}


private void doHllPersist() throws IOException {
htable.put(puts);
if (!isHbaseState())
mutiPut();


}


protected abstract boolean isHbaseState();


public abstract HashMap<byte[], HyperLogLog> multiGet()
throws IOException;


protected abstract void mutiPut();


protected abstract void prepareMutiPut(byte[] key, byte[] bytes);


// 各种没用的辅助变量
HTableInterface htable = null;
List<Put> puts = new ArrayList<Put>();
List<T> gets = new ArrayList<T>();
HashMap<byte[], HyperLogLog> result = new HashMap<byte[], HyperLogLog>();
long lastUpdateTime = System.currentTimeMillis();
private int interval = 1000;
private int writeSize = 50;
protected String htableName = "myTrfcstatTest";
protected String columnFamily = Constant.PLTF_HTABLE_CF;
protected String qualifier = "uvHll";// 存储二级制
protected String uv = "uv";// 计算完的值,落地给报表组的同事用


}
}
0 0
原创粉丝点击