hadoop入门程序WordCount代码详解

来源：互联网发布：网络舆情监测软件编辑：程序博客网时间：2024/05/22 10:35

输入文件：

file1：

hello world

file2：

hello hadoop

输出文件：

file：

world 1

hello 2

hadoop 1

以下是代码解释，后面有本人的源代码。

1.首先是Mapper类

2.Reducer类

3.Job类

源代码如下：

import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class Mapper1 extends Mapper<LongWritable, Text, Text, IntWritable> {private IntWritable one=new IntWritable(1);private Text text=new Text();@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String[] values = value.toString().split(" ");for(String val:values){text.set(val);context.write(text, one);}}}

import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class Reducer1 extends Reducer<Text, IntWritable, Text, IntWritable> {@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {int sum=0;for(IntWritable i:values){sum=sum+1;}context.write(key, new IntWritable(sum));}}

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class Job1 {public static void main(String[] args) throws Exception{// TODO Auto-generated method stubConfiguration conf = new Configuration();Job job=new Job(conf);job.setJarByClass(Job1.class);job.setJobName("wordcount");job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setMapperClass(Mapper1.class);job.setReducerClass(Reducer1.class);job.setInputFormatClass(TextInputFormat.class);          job.setOutputFormatClass(TextOutputFormat.class);                FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        job.waitForCompletion(true);}}

谢谢读阅！

阅读全文

0 0