日志分析demo
来源:互联网 发布:电脑怎么下载淘宝 编辑:程序博客网 时间:2024/06/05 20:50
日志分析
在对日志进行分析的时候,恰巧写了两个不同的demo,记录一下:
代码块
1 初始版
@Componentpublic class LogMonitoring { @Autowired protected UnitSettingDao unitSettingDao; // 缓存unitSetting table中的 pid 和 cid; private static HashMap<String,String[]> unitPidCidMem = new HashMap<String, String[]>(); public static void main(String[] args) { long startTime =System.currentTimeMillis(); int code = 0; ClassPathXmlApplicationContext context = null; String inputPath = "/home/work/input.log"; try { Log.infoLog(">>> start to prepare env."); context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml"); Log.infoLog(">>> start to process log."); LogMonitoring logMonitoring = context.getBean(LogMonitoring.class); code = logMonitoring.unitStypeMonitor(inputPath); } catch (Exception e) { e.printStackTrace(); code = 1; } finally { context.close(); } double processTime = (System.currentTimeMillis()-startTime)/1000; Log.infoLog("Total process time is :" + processTime); System.exit(code); } /** * unitsetting监控 */ public int unitStypeMonitor(String inputFilePath) throws IOException{ FileReader fr = null; BufferedReader br = null; int value = 0 ; String line = null; String unitId; String userId; String provid; String cityid; String pid = null; String cid = null; boolean flag; Set<Long> unitIds = new HashSet<Long>(); try{ fr = new FileReader(inputFilePath); br = new BufferedReader(fr); while ((line = br.readLine()) != null) { String[] tempStr; if (StringUtils.isEmpty(line) || (tempStr = line.split("\\s+")).length < 8) { continue; } unitId = tempStr[8].split(":|,")[1]; userId = tempStr[6].split(":|,")[1]; provid = tempStr[tempStr.length-2].split(":|,")[1]; cityid = tempStr[tempStr.length-1].split(":")[1]; unitIds.add(Long.parseLong(unitId)); if(unitPidCidMem.containsKey(unitId)){ pid = unitPidCidMem.get(unitId)[0]; cid = unitPidCidMem.get(unitId)[1]; flag = compareId(provid,cityid,pid,cid); }else { Map<Long,UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(userId),unitIds); if(TbFeedInterestPoMap.containsKey(Long.parseLong(unitId))) { pid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getPid(); cid = TbFeedInterestPoMap.get(Long.parseLong(unitId)).getCid(); }else{ Log.infoLog("unitId id is not in databases!"); } if((StringUtils.isEmpty(pid)) && (StringUtils.isEmpty(cid))){ continue; // 不限地域 } flag = compareId(provid,cityid,pid,cid); String[] pidCid = new String[2]; pidCid[0] = pid; pidCid[1] = cid; unitPidCidMem.put(unitId,pidCid); //将pid,Cid加入缓存; try { Thread.sleep(1); //sleep 1 mills,缓解数据库压力 } catch (InterruptedException e) { e.printStackTrace(); } } if(!flag){ Log.infoLog(userId + " error! " + "line : "+ line); //误报 value = 1; } } } catch (Exception e) { throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e); } finally { try { if (br != null) { br.close(); } if (fr != null) { fr.close(); } } catch (Exception e) { e.printStackTrace(); } } return value; } /** * @param provid * @param cityid * @param pid:db中用','分割的多条pid; * @param cid:db中用','分割的多条cid; * @return */ public boolean compareId(String provid,String cityid,String pid,String cid){ if(StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)){ return false; } if(pid.equals("0") ||(cid.equals("0"))){ return true; } List<String> pidList = Arrays.asList(pid.split(",")); List<String> cidList = Arrays.asList(cid.split(",")); if(pidList.contains(provid) || cidList.contains(cityid)){ return true; } if(pid.equals("0")&&cidList.contains(cityid)){ return true; } return false; }}
2 优化版
package com.baidu.fengchao.sirius.scripts.cases;import com.baidu.fengchao.sirius.orm.dao.UnitSettingDao;import com.baidu.fengchao.sirius.orm.po.UnitSetting;import com.baidu.fengchao.sirius.scripts.Log;import org.apache.commons.lang.StringUtils;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.context.support.ClassPathXmlApplicationContext;import org.springframework.stereotype.Component;import java.io.*;import java.util.*;/** * @author liuchaoqun01 * @version 1.0 */@Componentpublic class LogMonitoring { @Autowired protected UnitSettingDao unitSettingDao; private static HashMap<Long, String[]> unitPidCidMem = new HashMap<Long, String[]>(); //缓存unitSetting table中的pid和cid; public static void main(String[] args) { long startTime = System.currentTimeMillis(); int code; ClassPathXmlApplicationContext context = null; String inputPath = "/home/work/liuchaoqun01/feedclk.log"; try { Log.infoLog(">>> start to prepare env."); context = new ClassPathXmlApplicationContext("classpath:applicationContext.xml"); Log.infoLog(">>> start to process log."); LogMonitoring logMonitoring = context.getBean(LogMonitoring.class); code = logMonitoring.unitStypeMonitor(inputPath); } catch (Exception e) { e.printStackTrace(); code = 1; } finally { context.close(); } double processTime = (System.currentTimeMillis() - startTime) / (1000 * 1.0); Log.infoLog("Total process time is :" + processTime); System.exit(code); } /** * unitsetting监控 */ public int unitStypeMonitor(String inputFilePath) throws IOException { FileReader fr = null; BufferedReader br = null; int value = 0; String line = null; String unitId; String userId; String provid; String cityid; int num = 0; int lineNumber = 0; String pid; String cid; boolean flag; HashMap<String, Set<Long>> unitIdsMap = new HashMap<String, Set<Long>>(); // 需要查库的unitIdsMap; HashMap<Integer, String> logData = new HashMap<Integer, String>(); // 保存log<num,line> HashMap<Integer, String[]> infoData = new HashMap<Integer, String[]>(); // 保存log<pid,cid> try { fr = new FileReader(inputFilePath); br = new BufferedReader(fr); while ((line = br.readLine()) != null) { ++lineNumber; String[] tempStr = line.split("\\s+"); if (StringUtils.isEmpty(line) || (tempStr.length < 10) || (tempStr.length > 18)) { //过滤无效log和打错的log continue; } unitId = tempStr[8].split(":|,")[1]; userId = tempStr[6].split(":|,")[1]; provid = tempStr[tempStr.length - 2].split(":|,")[1]; cityid = tempStr[tempStr.length - 1].split(":")[1]; //如果在内存中,直接比较; if (unitPidCidMem.containsKey(unitId)) { pid = unitPidCidMem.get(unitId)[0]; cid = unitPidCidMem.get(unitId)[1]; flag = compareId(provid, cityid, pid, cid); if (!flag) { Log.infoLog(userId + " error! " + "line : " + line); //误报 value = 1; } } else { // 累计,当到达100条时进行读库并更新内存; logData.put(lineNumber, line); // 镜像将要处理的log数据; String[] infoString = new String[3]; infoString[0] = provid; infoString[1] = cityid; infoString[2] = unitId; infoData.put(lineNumber, infoString); if (!unitIdsMap.containsKey(userId)) { Set<Long> unitIdSet = new HashSet<Long>(); unitIdSet.add(Long.parseLong(unitId)); unitIdsMap.put(userId, unitIdSet); } else { Set<Long> unitIdSet = unitIdsMap.get(userId); unitIdSet.add(Long.parseLong(unitId)); unitIdsMap.put(userId, unitIdSet); } ++num; if (num == 100) { // 查库&&加入内存,数据重新初始化; if(!batchProcess(unitIdsMap,logData,infoData)) { value = 1; } num = 0; unitIdsMap = new HashMap<String, Set<Long>>(); logData = new HashMap<Integer, String>(); infoData = new HashMap<Integer, String[]>(); } else { continue; } } } } catch (Exception e) { throw new RuntimeException("load file error:" + inputFilePath + ", line:" + line, e); } finally { try { if (br != null) { br.close(); } if (fr != null) { fr.close(); } } catch (Exception e) { e.printStackTrace(); } } return value; } /** * @param provid * @param cityid * @param pid:db中用','分割的多条pid; * @param cid:db中用','分割的多条cid; * @return */ public boolean compareId(String provid, String cityid, String pid, String cid) { if(StringUtils.isEmpty(pid) && StringUtils.isEmpty(cid)){ return true; } if (pid.equals("0") && (cid.equals("0"))) { return true; } if (StringUtils.isEmpty(pid) || StringUtils.isEmpty(cid)) { return false; } List<String> pidList = Arrays.asList(pid.split(",")); List<String> cidList = Arrays.asList(cid.split(",")); if (pidList.contains(provid) || cidList.contains(cityid)) { return true; } if (pid.equals("0") && cidList.contains(cityid)) { return true; } return false; } /** * 1 每100条进行读库操作 2 保证传过来的数据内存中是没有的,都是需要进行查库操作的; * * @param */ public boolean batchProcess(HashMap<String, Set<Long>> unitIdsMap, HashMap<Integer, String> logData, HashMap<Integer, String[]> infoData) { boolean flag = true; //标记位; boolean code; Long unitId; for (String user : unitIdsMap.keySet()) { Set<Long> unitIds = unitIdsMap.get(user); Map<Long, UnitSetting> TbFeedInterestPoMap = unitSettingDao.getByUnitids(Long.parseLong(user), unitIds); try { //sleep 1 mills,缓解数据库压力 Thread.sleep(1); } catch (InterruptedException e) { e.printStackTrace(); } for (Long unit : TbFeedInterestPoMap.keySet()) { if (!unitPidCidMem.containsKey(unit)) { String[] pidCid = new String[2]; pidCid[0] = TbFeedInterestPoMap.get(unit).getPid(); pidCid[1] = TbFeedInterestPoMap.get(unit).getCid(); unitPidCidMem.put(unit, pidCid); } } } for (Integer lineNumber : infoData.keySet()) { String[] infoStr = infoData.get(lineNumber); unitId = Long.parseLong(infoStr[2]); if(unitPidCidMem.containsKey(unitId)) { String[] pidCid = unitPidCidMem.get(unitId); code = compareId(infoStr[0], infoStr[1], pidCid[0], pidCid[1]); if (!code) { Log.infoLog( "lineNumber:"+ lineNumber + "error!" +" :" + logData.get(lineNumber)); //误报 flag = false; } }else { Log.infoLog("There is bug in code : com.baidu.fengchao.sirius.scripts.cases.LogMonitoring"); } } return flag; }}
3 小结:
数据:145M
方法1:运行时间1.4h
方法2:运行时间30min
共同点:两种方法在比较时都增加了缓存,但是方法2是依次读取,当累计缓存命中失败100次以后,进行统一查库,并且更新缓存;而方法1是直接每次都要访问数据库,还是有一定的局限性,会出现大量数据库访问的耗时。
阅读全文
0 0
- 日志分析demo
- 日志采集分析项目Demo
- iis日志分析和tomcat日志分析(python)demo
- Spark日志分析项目Demo(1)--Flume-ng的安装
- Spark日志分析项目Demo(3)--Spark入口和DataFrame
- Spark日志分析项目Demo(5)--自定义Accumulator
- Spark日志分析项目Demo(9)--常规性能调优
- Spark日志分析项目Demo(10) --JVM调优
- iis日志和tomcat访问日志批量分析demo(python)
- Spark日志分析项目Demo(6)--页面单跳转化率分析
- android读取日志demo
- Spark日志分析项目Demo(4)--RDD使用,用户行为统计分析
- Spark日志分析项目Demo(7)--临时表查询,各区域top3热门商品统计
- Spark日志分析项目Demo(8)--SparkStream,广告点击流量实时统计
- 搭建ELK(ElasticSearch+Logstash+Kibana)日志分析系统(一) Logstash demo演示
- 搭建ELK(ElasticSearch+Logstash+Kibana)日志分析系统(六) ElasticSearch demo演示
- 搭建ELK(ElasticSearch+Logstash+Kibana)日志分析系统(十一) kinaba demo
- 日志分析
- 热修复框架AndFix【源码阅读】
- C#泛型学习
- 汇编语言: 试编写一段程序,从键盘接收一个四位的十六进制数,并在终端上显示与它等值的二 进制数。
- 5-27
- 数据库设计基础原则
- 日志分析demo
- 【sql】sql优化
- 模板方法模式
- [LeetCode]565. Array Nesting
- JavaScript 02
- 排序算法之快速排序
- JVM源码阅读-Dalvik类的加载
- OpenCV直方图(1)
- 线性布局综合案例