hadoop mapreduce tomcat日志分析
来源:互联网 发布:java 前景 知乎 编辑:程序博客网 时间:2024/05/16 12:25
1、在windows 本机解压缩 hadoop
2、cmd切换到 hadoop的bin目录 执行 hadoop -version 查看是否满足hadoop运行环境
3、cmd切换到hadoop的sbin目录 执行 start-all.cmd 启动 hadoop单机
4、创建hdfs目录
hadoop fs -mkidr /hdfs
5、上传 tomcat 日志
hadoop fs -put f:/tomcat/log/localhost* /hdfs
6、编写mapreduce分析tomcat日志 ,代码结构如图
Mapper_.java
package com.fw.hadoop.example.log;import java.io.IOException;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class Mapper_ extends Mapper<LongWritable,Text,Text,IntWritable>{ public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ String line=value.toString();String method="NONE";String ip=ip(line);if(post(line)){method="POST";}if(get(line)){method="GET";}context.write(new Text(ip+"-"+method), new IntWritable(0));} public static String ip(String str){Pattern pattern=Pattern.compile("[0-9,.]*");Matcher matcher = pattern.matcher(str);String result="";if (matcher.find()) { result = matcher.group(0); } return result;}public static boolean post(String str){Pattern pattern=Pattern.compile("] \"POST");Matcher matcher = pattern.matcher(str);return matcher.find();}public static boolean get(String str){Pattern pattern=Pattern.compile("] \"GET");Matcher matcher = pattern.matcher(str);return matcher.find();} }
Reducer_.java
package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class Reducer_ extends Reducer<Text ,IntWritable,Text,IntWritable>{ public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{int count=1; for(IntWritable val:values){count+=val.get(); } context.write(key, new IntWritable(count));} }
Main.java
package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Main {public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{Configuration conf = new Configuration();Job job = Job.getInstance(conf,"tomcat日志分析"); job.setJarByClass(Main.class); job.setMapperClass(Mapper_.class); job.setCombinerClass(Reducer_.class); job.setReducerClass(Reducer_.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*")); FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1")); System.exit(job.waitForCompletion(true) ? 0 : 1);}}
执行之后查看 http://localhost:50070/explorer.html#/hdfs/log1
我本机日志分析结果为 :
-NONE 2
1.192.34.102-GET 2
1.193.127.216-GET 2
101.199.108.119-GET 2
101.199.108.120-GET 2
101.199.108.52-GET 2
101.199.108.54-GET 2
101.199.112.45-GET 2
101.199.112.52-GET 2
101.226.102.140-GET 3
101.226.102.145-GET 2
101.226.102.146-GET 3
101.226.102.237-GET 2
101.226.102.52-GET 2
101.226.102.79-GET 2
101.226.102.89-GET 2
101.226.102.94-GET 2
101.226.102.97-GET 2
101.226.114.166-GET 2
101.226.125.109-GET 2
101.226.125.113-GET 2
101.226.125.118-GET 2
101.226.125.119-GET 2
101.226.125.120-GET 2
101.226.125.15-GET 2
101.226.125.18-GET 2
101.226.125.19-GET 2
101.226.33.218-GET 2
101.226.33.220-GET 2
101.226.33.223-GET 2
101.226.65.102-GET 2
101.226.66.173-GET 2
101.226.66.177-GET 2
101.226.66.178-GET 2
101.226.66.181-GET 2
101.226.69.109-GET 2
101.226.69.112-GET 2
101.226.85.67-GET 2
101.226.89.14-GET 2
101.226.93.201-GET 2
101.226.93.241-GET 2
101.226.99.196-GET 2
103.221.141.147-GET 3
103.221.141.147-POST 3
106.120.160.109-GET 4
106.120.161.68-GET 3
112.65.193.15-GET 2
115.60.62.127-GET 2
117.185.27.113-GET 4
117.185.27.115-GET 4
140.207.118.16-GET 2
140.207.185.123-GET 2
140.207.185.125-GET 3
140.207.185.126-GET 2
140.207.54.140-GET 2
140.207.54.144-GET 2
140.207.54.158-GET 4
140.207.54.199-GET 2
140.207.54.218-GET 3
140.207.63.102-GET 2
140.207.63.103-GET 2
163.177.82.107-GET 2
163.177.82.107-NONE 2
171.10.205.79-POST 2
171.10.4.159-GET 2
171.10.4.159-POST 2
171.10.69.92-GET 2
171.10.69.92-POST 2
171.10.92.181-GET 2
171.10.92.181-POST 2
171.11.2.238-GET 2
171.11.2.238-POST 2
171.11.3.151-GET 2
171.11.3.151-POST 2
171.11.4.91-GET 2
172.16.30.1-GET 8
172.16.30.1-POST 8
182.118.20.156-GET 2
183.12.116.19-GET 2
183.12.116.19-POST 2
183.57.53.222-GET 2
192.168.240.224-GET 2
192.168.31.131-GET 3
192.168.31.193-GET 3
192.168.31.193-POST 3
192.168.32.100-GET 5
192.168.32.100-POST 2
192.168.32.108-GET 5
192.168.32.111-GET 2
192.168.32.111-POST 2
192.168.32.20-GET 4
192.168.32.20-POST 2
192.168.32.223-GET 2
192.168.32.37-GET 3
192.168.32.37-POST 2
192.168.32.41-GET 6
192.168.32.41-POST 5
192.168.32.63-GET 3
220.181.132.196-GET 2
222.66.141.10-GET 2
223.104.105.29-GET 2
223.104.105.29-POST 2
59.58.193.90-GET 2
59.58.193.90-POST 2
59.78.209.100-GET 2
61.151.217.45-GET 2
61.151.226.16-GET 2
61.151.226.191-GET 2
61.158.148.109-POST 2
61.158.148.116-GET 2
61.158.148.116-POST 2
61.158.148.43-GET 2
61.158.148.43-POST 2
61.158.148.48-GET 2
61.158.148.48-POST 2
61.158.148.51-POST 3
61.158.148.90-POST 2
61.158.149.129-POST 2
61.158.149.147-GET 2
61.158.149.147-POST 2
61.158.149.169-GET 2
61.158.149.169-POST 2
61.158.149.190-GET 2
61.158.149.190-POST 2
61.158.149.230-GET 2
61.158.149.230-POST 2
61.158.149.239-GET 2
61.158.149.239-POST 2
61.158.149.26-POST 2
61.158.149.29-POST 2
61.158.149.47-GET 2
61.158.149.47-POST 2
61.158.152.100-GET 2
61.158.152.100-POST 2
61.158.152.13-GET 2
61.158.152.13-POST 2
61.158.152.57-GET 2
61.158.152.57-POST 2
61.158.152.76-POST 2
61.178.77.18-GET 6
61.178.77.18-POST 4
8、如果想分析本地文件,那么更简单,设置 FileInputFormat 和FileOutFormat的时候,直接使用本地完整路径即可。
在本地运行的时候,hadoop只是作为一个java进程存在!
package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Main {public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{Configuration conf = new Configuration(); Job job = Job.getInstance(conf,"tomcat日志分析"); job.setJarByClass(Main.class); job.setMapperClass(Mapper_.class); job.setCombinerClass(Reducer_.class); job.setReducerClass(Reducer_.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); /* FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*")); FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1"));*/ FileInputFormat.addInputPath(job, new Path("F:\\scrt_downLoad\\localhost*")); FileOutputFormat.setOutputPath(job, new Path("F:\\10")); System.exit(job.waitForCompletion(true) ? 0 : 1);}}
9、将这个job打包为jar,然后传到hadoop服务器,使用命令行执行
eclipse -> export -jar 选择 这三个类 Main.java 、Reducer_.java、Mapper_.java. -> 名字:tomcat-log.jar
hadoop 执行jar命令
hadoop jar tomcat-log.jar com.fw.hadoop.example.log.Main
10、如果遇到无法编译,则需要重写
org.apache.hadoop.io.nativeio.NativeIO类,去掉磁盘读写权限判定。
- hadoop mapreduce tomcat日志分析
- hadoop mapreduce分析访问日志
- Hadoop分析tomcat访问日志
- 使用python构建基于hadoop的mapreduce日志分析平台
- Hadoop-03-第二个MapReduce程序--模拟分析购物日志
- Hadoop之MapReduce 分析
- Mapreduce hadoop 分析
- 日志分析 mapreduce sogou
- hadoop配置Mapreduce job日志
- Hadoop MapReduce容错性分析
- HADOOP:MapReduce源码分析总结
- Hadoop MapReduce容错性分析
- Hadoop MapReduce容错性分析
- hadoop源码分析(MapReduce)
- Hadoop MapReduce容错性分析
- Hadoop之MapReduce WordCount分析
- Hadoop之MapReduce程序分析
- Hadoop MapReduce之MapOutputBuffer分析
- JAVA学习小结(四)工厂模式区别
- 什么是融资
- JavaScript中的Window对象(弹出确认框)
- 网红曝曾“包养”薛之谦 被骗钱骗感情给前妻买房
- js算法--进阶
- hadoop mapreduce tomcat日志分析
- 关于Android Studio导入项目时一直building的问题
- struts中自定义标签引入到jsp中报错问题,不能引入问题
- 第一个博客文章
- Ubuntu下,使用mavlink_shell.py连接pixhawk
- 初始化bar页面和选项卡点击监听
- 数据结构-单链表按序号删除
- [转]QTableView 双击 获取 一行数据
- k个最小和 K路归并问题