Hadoop 编码

来源：互联网发布：websocket nginx 转发编辑：程序博客网时间：2024/06/05 04:56

hadoop 编码

以下是两个hadoop编码的实例，可以通过这个实例来了解hadoop 的一套东西。

实例一：查找 2013-01-09 号 18：30 的数据。

public static class Map extends Mapper<LongWritable, Text, Text, Text> {

public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
SimpleDateFormat formatter = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
try {
Date beforTime = formatter.parse("2013-01-09 18:30:00");
Date afterTime = formatter.parse("2013-01-09 18:30:59");
Date needTime = formatter.parse(informations[1]);

if (needTime.getTime() > beforTime.getTime()
&& needTime.getTime() < afterTime.getTime()) {
context.write(new Text("2013-01-09 18:30"), value);
}

} catch (ParseException e) {
context.write(new Text("Error"), value);
}
}
}

public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {

public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {

if (key.toString().equals("Error")) {
context.write(new Text("时间格式错误数据"), NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}

} else {
context.write(new Text(key.toString() + "，一分钟内数据"),
NullWritable.get());
for (Text value : values) {
context.write(value, NullWritable.get());
}
}
}
}

public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {

Configuration conf = new Configuration();

String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();

FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);

Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo1.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(NumReduceTasks);

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);

}

public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {

String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topTime/" };
runJob(files, 2);

return null;
}
});
}

实例二：从文件中找到在北京，tcp 的数据。

public static class Map extends Mapper<LongWritable, Text, Text, Text> {

public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String information = value.toString();
String[] informations = information.split(",");
if (informations[11].contains("北京")
&& informations[12].equalsIgnoreCase("tcp")) {
context.write(new Text("TCP_北京"), value);
}
}
}

public static class Reduce extends Reducer<Text, Text, Text, NullWritable> {

public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Text value : values) {
sum++;
}

for (Text value : values) {
context.write(value, NullWritable.get());
}

context.write(new Text("总条数为:" + sum + "条"), NullWritable.get());
}
}

public static void runJob(String inputPath[], int NumReduceTasks)
throws IOException, InterruptedException, ClassNotFoundException {

Configuration conf = new Configuration();

String[] ioArgs = new String[] { inputPath[0], inputPath[1] };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs();

FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputPath[1]), true);

Job job = new Job(conf, "TopOverTime");
job.setJarByClass(TopDemo2.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(NumReduceTasks);

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.waitForCompletion(true);

}

public static void main(final String[] args) throws IOException,
InterruptedException {
UserGroupInformation ugi = UserGroupInformation
.createRemoteUser("lion");
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {

String[] files = new String[] { "/user/lion/input/top*",
"/user/lion/topSum/" };
runJob(files, 1);

return null;
}
});
}‘

hadoop 的 mapreduce 编程，主要就是在map中将逻辑写清楚，在reduce中，得到想要的结果。

0 0