hadoop mapreduce tomcat日志分析

来源:互联网 发布:java 前景 知乎 编辑:程序博客网 时间:2024/05/16 12:25

1、在windows 本机解压缩 hadoop


2、cmd切换到 hadoop的bin目录 执行 hadoop -version 查看是否满足hadoop运行环境

3、cmd切换到hadoop的sbin目录 执行 start-all.cmd 启动 hadoop单机

4、创建hdfs目录

hadoop fs -mkidr /hdfs

5、上传 tomcat 日志

hadoop fs -put  f:/tomcat/log/localhost*  /hdfs

6、编写mapreduce分析tomcat日志 ,代码结构如图


Mapper_.java

package com.fw.hadoop.example.log;import java.io.IOException;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class Mapper_ extends Mapper<LongWritable,Text,Text,IntWritable>{ public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ String line=value.toString();String method="NONE";String ip=ip(line);if(post(line)){method="POST";}if(get(line)){method="GET";}context.write(new Text(ip+"-"+method), new IntWritable(0));} public static String ip(String str){Pattern pattern=Pattern.compile("[0-9,.]*");Matcher matcher = pattern.matcher(str);String result="";if (matcher.find()) {     result = matcher.group(0);           } return result;}public static boolean post(String str){Pattern pattern=Pattern.compile("] \"POST");Matcher matcher = pattern.matcher(str);return matcher.find();}public static boolean get(String str){Pattern pattern=Pattern.compile("] \"GET");Matcher matcher = pattern.matcher(str);return matcher.find();} }



Reducer_.java

package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class Reducer_  extends Reducer<Text ,IntWritable,Text,IntWritable>{ public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{int count=1; for(IntWritable val:values){count+=val.get(); } context.write(key, new IntWritable(count));} }



Main.java

package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Main {public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{Configuration conf = new Configuration();Job job = Job.getInstance(conf,"tomcat日志分析");    job.setJarByClass(Main.class);    job.setMapperClass(Mapper_.class);    job.setCombinerClass(Reducer_.class);    job.setReducerClass(Reducer_.class);    job.setOutputKeyClass(Text.class);    job.setOutputValueClass(IntWritable.class);    FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*"));    FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1"));    System.exit(job.waitForCompletion(true) ? 0 : 1);}}




执行之后查看  http://localhost:50070/explorer.html#/hdfs/log1

我本机日志分析结果为 :


-NONE 2
1.192.34.102-GET 2
1.193.127.216-GET 2
101.199.108.119-GET 2
101.199.108.120-GET 2
101.199.108.52-GET 2
101.199.108.54-GET 2
101.199.112.45-GET 2
101.199.112.52-GET 2
101.226.102.140-GET 3
101.226.102.145-GET 2
101.226.102.146-GET 3
101.226.102.237-GET 2
101.226.102.52-GET 2
101.226.102.79-GET 2
101.226.102.89-GET 2
101.226.102.94-GET 2
101.226.102.97-GET 2
101.226.114.166-GET 2
101.226.125.109-GET 2
101.226.125.113-GET 2
101.226.125.118-GET 2
101.226.125.119-GET 2
101.226.125.120-GET 2
101.226.125.15-GET 2
101.226.125.18-GET 2
101.226.125.19-GET 2
101.226.33.218-GET 2
101.226.33.220-GET 2
101.226.33.223-GET 2
101.226.65.102-GET 2
101.226.66.173-GET 2
101.226.66.177-GET 2
101.226.66.178-GET 2
101.226.66.181-GET 2
101.226.69.109-GET 2
101.226.69.112-GET 2
101.226.85.67-GET 2
101.226.89.14-GET 2
101.226.93.201-GET 2
101.226.93.241-GET 2
101.226.99.196-GET 2
103.221.141.147-GET 3
103.221.141.147-POST 3
106.120.160.109-GET 4
106.120.161.68-GET 3
112.65.193.15-GET 2
115.60.62.127-GET 2
117.185.27.113-GET 4
117.185.27.115-GET 4
140.207.118.16-GET 2
140.207.185.123-GET 2
140.207.185.125-GET 3
140.207.185.126-GET 2
140.207.54.140-GET 2
140.207.54.144-GET 2
140.207.54.158-GET 4
140.207.54.199-GET 2
140.207.54.218-GET 3
140.207.63.102-GET 2
140.207.63.103-GET 2
163.177.82.107-GET 2
163.177.82.107-NONE 2
171.10.205.79-POST 2
171.10.4.159-GET 2
171.10.4.159-POST 2
171.10.69.92-GET 2
171.10.69.92-POST 2
171.10.92.181-GET 2
171.10.92.181-POST 2
171.11.2.238-GET 2
171.11.2.238-POST 2
171.11.3.151-GET 2
171.11.3.151-POST 2
171.11.4.91-GET 2
172.16.30.1-GET 8
172.16.30.1-POST 8
182.118.20.156-GET 2
183.12.116.19-GET 2
183.12.116.19-POST 2
183.57.53.222-GET 2
192.168.240.224-GET 2
192.168.31.131-GET 3
192.168.31.193-GET 3
192.168.31.193-POST 3
192.168.32.100-GET 5
192.168.32.100-POST 2
192.168.32.108-GET 5
192.168.32.111-GET 2
192.168.32.111-POST 2
192.168.32.20-GET 4
192.168.32.20-POST 2
192.168.32.223-GET 2
192.168.32.37-GET 3
192.168.32.37-POST 2
192.168.32.41-GET 6
192.168.32.41-POST 5
192.168.32.63-GET 3
220.181.132.196-GET 2
222.66.141.10-GET 2
223.104.105.29-GET 2
223.104.105.29-POST 2
59.58.193.90-GET 2
59.58.193.90-POST 2
59.78.209.100-GET 2
61.151.217.45-GET 2
61.151.226.16-GET 2
61.151.226.191-GET 2
61.158.148.109-POST 2
61.158.148.116-GET 2
61.158.148.116-POST 2
61.158.148.43-GET 2
61.158.148.43-POST 2
61.158.148.48-GET 2
61.158.148.48-POST 2
61.158.148.51-POST 3
61.158.148.90-POST 2
61.158.149.129-POST 2
61.158.149.147-GET 2
61.158.149.147-POST 2
61.158.149.169-GET 2
61.158.149.169-POST 2
61.158.149.190-GET 2
61.158.149.190-POST 2
61.158.149.230-GET 2
61.158.149.230-POST 2
61.158.149.239-GET 2
61.158.149.239-POST 2
61.158.149.26-POST 2
61.158.149.29-POST 2
61.158.149.47-GET 2
61.158.149.47-POST 2
61.158.152.100-GET 2
61.158.152.100-POST 2
61.158.152.13-GET 2
61.158.152.13-POST 2
61.158.152.57-GET 2
61.158.152.57-POST 2
61.158.152.76-POST 2
61.178.77.18-GET 6
61.178.77.18-POST 4


8、如果想分析本地文件,那么更简单,设置 FileInputFormat 和FileOutFormat的时候,直接使用本地完整路径即可。

在本地运行的时候,hadoop只是作为一个java进程存在!


package com.fw.hadoop.example.log;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Main {public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{Configuration conf = new Configuration(); Job job = Job.getInstance(conf,"tomcat日志分析");    job.setJarByClass(Main.class);    job.setMapperClass(Mapper_.class);    job.setCombinerClass(Reducer_.class);    job.setReducerClass(Reducer_.class);    job.setOutputKeyClass(Text.class);    job.setOutputValueClass(IntWritable.class);   /* FileInputFormat.addInputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/localhost*"));    FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:19000/hdfs/log1"));*/    FileInputFormat.addInputPath(job, new Path("F:\\scrt_downLoad\\localhost*"));    FileOutputFormat.setOutputPath(job, new Path("F:\\10"));    System.exit(job.waitForCompletion(true) ? 0 : 1);}}

9、将这个job打包为jar,然后传到hadoop服务器,使用命令行执行

eclipse  -> export -jar   选择 这三个类 Main.java 、Reducer_.java、Mapper_.java.   -> 名字:tomcat-log.jar

hadoop 执行jar命令

hadoop jar tomcat-log.jar com.fw.hadoop.example.log.Main

10、如果遇到无法编译,则需要重写

org.apache.hadoop.io.nativeio.NativeIO类,去掉磁盘读写权限判定。