理解Hadoop源码 --- WordCount

来源:互联网 发布:mac视频播放器下载 编辑:程序博客网 时间:2024/05/20 08:45

Gradle:


group 'yqg'version '1.0-SNAPSHOT'apply plugin: 'java'sourceCompatibility = 1.8repositories {    mavenCentral()}dependencies {    testCompile group: 'junit', name: 'junit', version: '4.12'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common    compile group: 'org.apache.hadoop', name: 'hadoop-common', version: '2.8.1'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core    compile group: 'org.apache.hadoop', name: 'hadoop-core', version: '2.6.0-mr1-cdh5.12.1', ext: 'pom'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs    compile group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: '2.8.1'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core    compile group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '2.8.1'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api    compile group: 'org.apache.hadoop', name: 'hadoop-yarn-api', version: '2.8.1'    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-jobclient    provided group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-jobclient', version: '2.8.1'    compile group: 'org.apache.hadoop', name: 'hadoop-mapreduce', version: '2.8.1', ext: 'pom'}



package wordcount;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import java.io.IOException;import java.util.StringTokenizer;/** * @author Ryan */public class WordCount {    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable>{        private final static IntWritable one = new IntWritable(1);        private Text word = new Text();        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{            String line = value.toString();            StringTokenizer tokenizer = new StringTokenizer(line);            while (tokenizer.hasMoreElements()){                word.set(tokenizer.nextToken());                context.write(word, one);            }        }    }    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{            int sum = 0;            for (IntWritable val : values){                sum += val.get();            }            context.write(key, new IntWritable(sum));        }    }    public static void main(String[] args) throws Exception{        Configuration conf = new Configuration();        Job job = new Job(conf, "wordcount");        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        job.setMapperClass(Map.class);        job.setReducerClass(Reduce.class);        job.setInputFormatClass(TextInputFormat.class);        job.setOutputFormatClass(TextOutputFormat.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        job.waitForCompletion(true);    }}



原创粉丝点击