Hadoop之——使用hadoop自定义类型处理手机上网日志
来源:互联网 发布:透镜设计软件 编辑:程序博客网 时间:2024/05/22 00:16
转载请注明出处:http://blog.csdn.net/l1028386804/article/details/46046517
不多说,直接上代码
package com.lyz.hadoop.count;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;/** * 利用Hadoop MapRedduce * @author liuyazhuang * */public class KpiApp {/** * 输入地址的路径 */private static final String INPUT_PATH = "hdfs://liuyazhuang:9000/d1/wlan";/** * 计算结果输出的路径 */private static final String OUT_PATH = "hdfs://liuyazhuang:9000/d1/out";public static void main(String[] args) throws Exception{//实例化Job对象Job job = new Job(new Configuration(), KpiApp.class.getSimpleName());//1.1指定输入文件路径FileInputFormat.setInputPaths(job, INPUT_PATH);//指定格式化输入文件的类job.setInputFormatClass(TextInputFormat.class);//1.2指定自定义的Mapper类job.setMapperClass(MyMapper.class);//指定输出<k2, v2>的类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(KpiWritable.class);//1.3指定分区类job.setPartitionerClass(HashPartitioner.class);//指定任务数量job.setNumReduceTasks(1);//1.4 TODO 排序,分区//1.5 TODO 合并(可选)//2.2指定自定义的reducer类job.setReducerClass(MyReducer.class);//指定输出的<k3, v3>类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(KpiWritable.class);//2.3指定输出的位置FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));//指定输出文件的格式化类job.setOutputFormatClass(TextOutputFormat.class);//把代码提交给JobTracker执行job.waitForCompletion(true);}/** * Mapper * @author liuyazhuang * */static class MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, KpiWritable>.Context context)throws IOException, InterruptedException {String[] splited = value.toString().split("\t");String msis = splited[1];Text k2 = new Text(msis);KpiWritable v2 = new KpiWritable(Long.parseLong(splited[6]), Long.parseLong(splited[7]), Long.parseLong(splited[8]), Long.parseLong(splited[9]));context.write(k2, v2);}}/** * Reducer * @author liuyazhuang * */static class MyReducer extends Reducer<Text, KpiWritable, Text, KpiWritable>{@Overrideprotected void reduce(Text k2, Iterable<KpiWritable> v2s, Reducer<Text, KpiWritable, Text, KpiWritable>.Context context) throws IOException, InterruptedException {long upPackNum = 0;long downPackNum = 0;long upPayLoad = 0;long downPayLoad = 0;for (KpiWritable kpiWritable : v2s) {upPackNum += kpiWritable.upPackNum;downPackNum += kpiWritable.downPackNum;upPayLoad += kpiWritable.upPayLoad;downPayLoad += kpiWritable.downPayLoad;}KpiWritable v3 = new KpiWritable(upPackNum, downPackNum, upPayLoad, downPayLoad);context.write(k2, v3);}}}/** * 自定义Hadoop数据类型 * @author liuyazhuang * */class KpiWritable implements Writable{long upPackNum;long downPackNum;long upPayLoad;long downPayLoad;public KpiWritable() {super();}public KpiWritable(long upPackNum, long downPackNum, long upPayLoad, long downPayLoad) {super();this.upPackNum = upPackNum;this.downPackNum = downPackNum;this.upPayLoad = upPayLoad;this.downPayLoad = downPayLoad;}@Overridepublic void write(DataOutput out) throws IOException {out.writeLong(upPackNum);out.writeLong(downPackNum);out.writeLong(upPayLoad);out.writeLong(downPayLoad);}@Overridepublic void readFields(DataInput in) throws IOException {this.upPackNum = in.readLong();this.downPackNum = in.readLong();this.upPayLoad = in.readLong();this.downPayLoad = in.readLong();}@Overridepublic String toString() {return "KpiWritable [upPackNum=" + upPackNum + ", downPackNum="+ downPackNum + ", upPayLoad=" + upPayLoad + ", downPayLoad="+ downPayLoad + "]";}}注意:
(1)在eclipse中调用的job.waitForCompletion(true)实际上执行如下方法
connect();
info = jobClient.submitJobInternal(conf);
(2)在connect()方法中,实际上创建了一个JobClient对象。
在调用该对象的构造方法时,获得了JobTracker的客户端代理对象JobSubmissionProtocol。
JobSubmissionProtocol的实现类是JobTracker。
(3)在jobClient.submitJobInternal(conf)方法中,调用了
JobSubmissionProtocol.submitJob(...),
即执行的是JobTracker.submitJob(...)。
(4)Hadoop的数据类型要求必须实现Writable接口。
(5)java基本类型与Hadoop常见基本类型的对照
Long LongWritable
Integer IntWritable
Boolean BooleanWritable
String Text
java类型如何转化为hadoop基本类型
调用hadoop类型的构造方法,或者调用set()方法。
new LongWritable(123L);
hadoop基本类型如何转化为java类型
对于Text,需要调用toString()方法,其他类型调用get()方法。
0 0
- Hadoop之——使用hadoop自定义类型处理手机上网日志
- Hadoop学习笔记—5.自定义类型处理手机上网日志
- Hadoop自定义类型处理手机上网日志(笔记5)
- Hadoop日志类型总结
- 实践:使用 Apache Hadoop 处理日志
- 实践:使用 Apache Hadoop 处理日志
- 实践:使用 Apache Hadoop 处理日志
- hadoop 添加自定义日志
- HADOOP 处理 NGINX 日志
- hadoop 处理日志
- Hadoop之——自定义计数器
- hadoop之日志分析
- hadoop 使用log4j日志
- Hadoop自定义输出日志log4j
- Hadoop 技术在电信运营商上网日志处理中的应用架构
- Hadoop 笔记之创建自定义分区---手机流量统计
- Hadoop详解(七)——Hive的原理和安装配置和UDF,flume的安装和配置以及简单使用,flume+hive+Hadoop进行日志处理
- Hadoop学习日志之hadoop的组成
- 编程细节 - 1
- error MSB3073::VCEnd”已退出,代码为 4
- Hibernate级联查找
- javascript操作select 组件 opetion的增减
- vs2010 error C2001: 常量中有换行符
- Hadoop之——使用hadoop自定义类型处理手机上网日志
- 使用反射来获取枚举的名称、值和特性
- ubuntu修改IP地址和网关
- 第十二周 项目2-摩托车继承自行车和机动车
- 不用翻墙搞定android开发环境
- Javah提示未找到 ..的类文件
- 关于FragmentManager动态管理Fragment时Fragment生命周期的探究
- 未来码农或可以备份一个自己的大脑
- Maven 编译打包时如何忽略测试用例