hadoop之"hello world"--wordCount
来源:互联网 发布:淘宝客程序2.0 编辑:程序博客网 时间:2024/05/17 01:47
hadoop版本1.2.1 ,开发IDE:eclipse;
编写map函数,继承mapper类。
package com.cjh.hadoop;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class WordMapper extends Mapper<Object, Text, Text, IntWritable>{private final static IntWritable one = new IntWritable(1);private Text word = new Text() ; @Overrideprotected void map(Object key, Text value,Context context)throws IOException, InterruptedException {//map中的key为每一行的偏移量,value为每一行的内容//StringTokenizer是java.util中的类,根据空格分隔一行中的每一个单词StringTokenizer itr = new StringTokenizer(value.toString()) ;while(itr.hasMoreTokens()){word.set(itr.nextToken());context.write(word, one);}}}
编写reduce函数,继承Reducer类
package com.cjh.hadoop;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class WordReducer extends Reducer<Text, IntWritable, Text, IntWritable>{private IntWritable result = new IntWritable() ; @Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {int sum = 0 ; for (IntWritable val : values) {sum += val.get() ;}result.set(sum);context.write(key, result);}}
编写main函数
package com.cjh.hadoop;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordMain {public static void main(String[] args) throws Exception {//读取hadoop的配置文件,如site-core.xml等。Configuration conf = new Configuration();//GenericOptionsParser这个类的主要作用是解析用户指定的参数并且修改conf的配置信息String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();if(otherArgs.length != 2){System.err.println("Usage: wordcount <in> <out>");System.exit(2);}//一个MapReduce任务。第二个参数为Job的名称Job job = new Job(conf,"WordCount") ;//主类job.setJarByClass(WordMain.class);//设置mapper类job.setMapperClass(WordMapper.class);//设置合成类job.setCombinerClass(WordReducer.class);//设置reducer类job.setReducerClass(WordReducer.class);//设置输出key类型job.setOutputKeyClass(Text.class);//设置输出value类型job.setOutputValueClass(IntWritable.class);//输入路径FileInputFormat.addInputPath(job, new Path(otherArgs[0]));//输出路径FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//等待job完成System.exit(job.waitForCompletion(true) ? 0 : 1);}}
遇到的问题:
1.3个类中导入的包一定要正确。
2.设置output文件夹的时候一定是不存在的。否则报错
0 0
- hadoop之"hello world"--wordCount
- Hadoop的“Hello world”---WordCount
- Hadoop之道--MapReduce之Hello World实例wordcount
- Hadoop之道--MapReduce之Hello World实例wordcount
- hadoop mapreduce hello world(wordcount)
- hadoop之hello world
- hello wordcount for hadoop
- Hadoop实战之三~ Hello World
- Hadoop hello world
- Hadoop Hello World
- Hadoop “Hello World” 示例
- Hadooop运行WordCount(Hello world)程序
- Hadoop之WordCount
- Hadoop之运行wordcount
- hadoop 之wordcount
- Hadoop 例子之 WordCount
- Hadoop之WordCount源代码
- hadoop之wordcount
- LPC1788FBD208笔记(002):SDRAM实验
- poj 2392 Space Elevator(多重背包)
- 关于信息登记的一点心得
- 容器适配器实现栈
- Linux Kernel资料
- hadoop之"hello world"--wordCount
- tcpdump抓包
- [Grails]使用Grails的Console执行简单的CRUD操作
- POJ 2455 二分最大流。
- SRM 605 D1 L2:AlienAndSetDiv1,DP,bitmask
- 字符串分割使用方法
- broken necklace
- linux的fnctl设置FD_CLOEXEC
- hdu Safecracker(回溯)