Hadoop MapReduce 简单案例--求素数个数

来源:互联网 发布:java常见注解 编辑:程序博客网 时间:2024/05/22 01:28
package test;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class Prime {//map只传值public static class SumMapper extendsMapper<Object, Text, Text, LongWritable> {private Text word = new Text();private static LongWritable one = new LongWritable(1);public void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());while (itr.hasMoreTokens()) {  String s = itr.nextToken();                  long val = Long.parseLong(s);                one.set(val);context.write(word, one);}}}//reduce判断是否为素数 public static class SumReducer extendsReducer<Text, LongWritable, Text, LongWritable> {private LongWritable result = new LongWritable();private Text m1 = new Text("Sum");public void reduce(Text key, Iterable<LongWritable> values,Context context) throws IOException, InterruptedException {long sum=0;for (LongWritable val : values) {int a=2;long b=val.get();while(a<b){if(b%a==0)break;a++;}if(a==b){sum++;}}result.set(sum);context.write(m1, result);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: numbersum <in> <out>");System.exit(2);}long startTime = System.currentTimeMillis();// 计算时间Job job = new Job(conf);job.setJarByClass(Prime.class);job.setMapperClass(SumMapper.class);job.setReducerClass(SumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));job.waitForCompletion(true);long endTime = System.currentTimeMillis();System.out.println("time=" + (endTime - startTime));System.exit(0);}}

实现过程:

建立文件夹in 建立文档1

文档1中的数据:

1 2 3 4 5 6 7 8 9 10

11 12 13 14 15 16 17 18 19 20

结果: