Hadoop 编码
来源:互联网 发布:websocket nginx 转发 编辑:程序博客网 时间:2024/06/05 04:56
hadoop 编码
以下是两个hadoop编码的实例,可以通过这个实例来了解hadoop 的一套东西。
实例一:查找 2013-01-09 号 18:30 的数据。
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
SimpleDateFormat formatter = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
try {
Date beforTime = formatter.parse("2013-01-09 18:30:00");
Date afterTime = formatter.parse("2013-01-09 18:30:59");
Date needTime = formatter.parse(informations[1]);
if (needTime.getTime() > beforTime.getTime()
&& needTime.getTime() < afterTime.getTime()) {
context.write(new Text("2013-01-09 18:30"), value);
}
} catch (ParseException e) {
context.write(new Text("Error"), value);
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
if (key.toString().equals("Error")) {
context.write(new Text("时间格式错误数据"), NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}
} else {
context.write(new Text(key.toString() + ",一分钟内数据"),
NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}
}
}
}
public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);
Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo1.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(NumReduceTasks);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);
}
public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topTime/" };
runJob(files, 2);
return null;
}
});
}
实例二:从文件中找到在北京,tcp 的数据。
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
if (informations[11].contains("北京")
&& informations[12].equalsIgnoreCase("tcp")) {
context.write(new Text("TCP_北京"), value);
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Text value : values) {
sum++;
}
for (Text value : values) {
context.write(value, NullWritable.get());
}
context.write(new Text("总条数为:" + sum + "条"), NullWritable.get());
}
}
public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);
Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo2.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(NumReduceTasks);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);
}
public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topSum/" };
runJob(files, 1);
return null;
}
});
}‘
hadoop 的 mapreduce 编程,主要就是在map中将逻辑写清楚,在reduce中,得到想要的结果。
- Hadoop 编码
- 【hadoop】 3005-hadoop对象序列化编码
- Hadoop中的数据编码/解码器
- 直接使用HADOOP-RPC的编码实例
- hadoop-streaming几个常用的错误编码。
- Hadoop序列化与编码浅析
- hadoop读取GBK编码文件解决之道!
- Hadoop的一个变长long编码剖析
- hadoop集群搭建与测试编码
- Hadoop中VIntWritable编码方式解析
- 编码问题及Hadoop中的Text
- hadoop和spark读取GBK编码乱码
- Hadoop 中文编码相关问题 -- mapreduce程序处理GBK编码数据并输出GBK编码数据
- Hadoop 中文编码相关问题 -- mapreduce程序处理GBK编码数据并输出GBK编码数据
- hadoop伪分布的搭建与测试编码
- Hadoop编码解码【压缩解压缩】机制详解(1)
- Hadoop编码解码【压缩解压缩】机制详解(1)
- Hadoop编码解码【压缩解压缩】机制详解(1)
- Java之fail-fast总结(通过arraylist说明其原理及解决办法)
- 【css】清除浮动(clearfix 和 clear)的用法
- 通过Axis整合SSH 项目创建WebService的流程步骤
- 一些解决问题的思维
- Android 蓝牙开发基本流程
- Hadoop 编码
- 谷歌为何成为虚拟运营商服务
- Kaldi随笔(一)
- UDP打洞
- 闲聊CSS之关于clearfix--清除浮动
- Java之Iterator和Enumeration比较
- 求数据结构大神帮我看看程序!!谢谢!!!
- 动态库调用静态库示例讲解(3)
- String类