Hadoop 编码

来源:互联网 发布:websocket nginx 转发 编辑:程序博客网 时间:2024/06/05 04:56

hadoop 编码


以下是两个hadoop编码的实例,可以通过这个实例来了解hadoop 的一套东西。

实例一:查找 2013-01-09 号 18:30 的数据。


public static class Map extends Mapper<LongWritable, Text, Text, Text> {


public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
SimpleDateFormat formatter = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
try {
Date beforTime = formatter.parse("2013-01-09 18:30:00");
Date afterTime = formatter.parse("2013-01-09 18:30:59");
Date needTime = formatter.parse(informations[1]);


if (needTime.getTime() > beforTime.getTime()
&& needTime.getTime() < afterTime.getTime()) {
context.write(new Text("2013-01-09 18:30"), value);
}


} catch (ParseException e) {
context.write(new Text("Error"), value);
}
}
}


public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {


public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {


if (key.toString().equals("Error")) {
context.write(new Text("时间格式错误数据"), NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}


} else {
context.write(new Text(key.toString() + ",一分钟内数据"),
NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}
}
}
}


public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {


Configuration conf = new Configuration();


String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();


FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);


Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo1.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);


job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);


job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(NumReduceTasks);


FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);


}


public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {


String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topTime/" };
runJob(files, 2);


return null;
}
});
}


实例二:从文件中找到在北京,tcp 的数据。



public static class Map extends Mapper<LongWritable, Text, Text, Text> {


public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
if (informations[11].contains("北京")
&& informations[12].equalsIgnoreCase("tcp")) {
context.write(new Text("TCP_北京"), value);
}
}
}




public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {


public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Text value : values) {
sum++;
}


for (Text value : values) {
context.write(value, NullWritable.get());
}


context.write(new Text("总条数为:" + sum + "条"), NullWritable.get());
}
}


public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {


Configuration conf = new Configuration();


String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();


FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);


Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo2.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);


job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);


job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(NumReduceTasks);


FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);


}


public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {


String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topSum/" };
runJob(files, 1);


return null;
}
});
}‘


hadoop 的 mapreduce 编程,主要就是在map中将逻辑写清楚,在reduce中,得到想要的结果。


0 0
原创粉丝点击