Hadoop WorldCount程序

来源：互联网发布：澳门网络真人博客编辑：程序博客网时间：2024/06/05 19:10
Hadoop WorldCount程序--aaa.txthello world hadoophello lhj hadoopgood luck to lhjnice to me lhj--Mapperimport java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{@Overrideprotected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {String line=value.toString();StringTokenizer st=new StringTokenizer(line);while(st.hasMoreTokens()){String world=st.nextToken();context.write(new Text(world), new IntWritable(1));}}}--Reducerimport java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> {@Overrideprotected void reduce(Text key, Iterable<IntWritable> iterable,Context context)throws IOException, InterruptedException {int sum=0;for(IntWritable i:iterable){sum=sum+i.get();}context.write(key, new IntWritable(sum));}}--JobRunimport org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class JobRun {public static void main(String[] args) {Configuration conf=new Configuration();//conf.set("fs.default.name", "hdfs://node1:9000");//conf.set("mapred.job.tracker", "node1:9001");//conf.set("mapred.jar", "C:\\Documents and Settings\\Administrator\\桌面");try {Job job=new Job(conf);job.setJarByClass(JobRun.class);job.setMapperClass(WcMapper.class);job.setReducerClass(WcReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setNumReduceTasks(1);//默认也是1FileInputFormat.addInputPath(job, new Path("/user/hadoop/input/wc"));FileOutputFormat.setOutputPath(job, new Path("/user/hadoop/output/wc"));System.exit(job.waitForCompletion(true) ? 0:1);} catch (Exception e) {e.printStackTrace();}}}--上传jar包hadoop fs -put wc.jar /user/hadoop/input/wc--hadoop中执行hadoop jar wc.jar JobRun       //wc.jar是导出的jar包，如果有包的话要加上包名, JobRun是有main方法的类--监控执行的进度http://node1:50030/jobtracker.jsp[hadoop@node1 ~]$ hadoop fs -cat /user/hadoop/output/wc/part-r-00000good    1hadoop  2hello   2lhj     3luck    1me      1nice    1to      2world   1
0 0