mapreduce程序详解

来源:互联网 发布:梦幻西游mac手机 编辑:程序博客网 时间:2024/06/01 07:28
package exp1.hadoop;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Answer2Exp2 {public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable>{ private static final byte userIndex = 2;private Text mapKey = new Text();public void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {// TODO Auto-generated method stub            String tmp;String  line = value.toString();//将文本转成字符串类型            String [] lineSplit = line.split("\t");//按tap键进行分片并存储起来,相当于linesplit数组里每一个代表一列            tmp = lineSplit[userIndex];//得到目标列            mapKey.set(tmp);//转成text格式            context.write(mapKey, new IntWritable(1));    //写入context       }}public static class ReduceClass extends Reducer<Text, Iterable<IntWritable>, Text, IntWritable>{private IntWritable totalCnt=new IntWritable();public void reduce(Text mapKey, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// TODO Auto-generated method stubint sum=0;for(IntWritable value : values){sum+=value.get();//对相同key进行累加计数}totalCnt.set(sum);context.write(mapKey, totalCnt);}}public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();//定义一个默认的配置Job job = Job.getInstance(conf);//初始化一个jobjob.setJarByClass(Answer2Exp2.class);//设置jar类job.setNumReduceTasks(1);//设置reduce任务的数量job.setJobName("User Statistical");//设置job名称//job.setMapperClass(MapClass.class);//设置map类//job.setReducerClass(ReduceClass.class);//设置reduce类job.setMapOutputKeyClass(Text.class);//设置map输出key类,记得与<>里的格式类型一致,此时为text。job.setMapOutputValueClass(IntWritable.class);//设置map输出value类,格式!job.setOutputKeyClass(Text.class);//最终输出的Key类型类,如上job.setOutputValueClass(IntWritable.class);//最终输出的value类,如上 FileInputFormat.addInputPath(job, new Path("F:/data/input/userurl_20150911"));//输入在本地         FileOutputFormat.setOutputPath(job, new Path("F:/data/output/o3"));//输出在本地
//可以用paths输出多个路径,用逗号隔开//FileInputFormat.addInputPath(job, new Path (args[0]));//FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1);//判断job是否完成,完成为1,未完成为0. }}