hadoop 编写mapreduce测试流程

来源:互联网 发布:java web入门教程 编辑:程序博客网 时间:2024/05/21 22:47

1.在hadoop安装目录下 启动 namenodedatanoderesourcemanagernodemanager

$ sbin/hadoop-daemon.sh start namenode

$ sbin/hadoop-daemon.sh start datanode

$ sbin/yarn-daemon.sh start resourcemanager

$ sbin/yarn-daemon.sh start nodemanager

查看启动状态 $jps

2.编写map reduce程序

mkdir userapp

      vim  WordCount.java

package org.myorg;


import java.io.IOException;
import java.util.*;


import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;


public class WordCount {


        public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();


        public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
                String line = value.toString();
                StringTokenizer tokenizer = new StringTokenizer(line);
                while (tokenizer.hasMoreTokens()) {
                        word.set(tokenizer.nextToken());
                        output.collect(word, one);
                }
        }
}


public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
                int sum = 0;
                while (values.hasNext()) {
                         sum += values.next().get();
                }
                output.collect(key, new IntWritable(sum));
        }
}


public static void main(String[] args) throws Exception {
        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");


        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);


        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);


        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);


3 编译map reduce程序

root@ubuntu:/usr/local/userapp# javac -classpath /usr/local/hadoop/share/hadoop/common/hadoop-common-2.6.0.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0.jar -d ./ WordCount.java

4 打包jar

root@ubuntu:/usr/local/userapp# jar -cvf /usr/local/wordcount.jar -C userapp/ .


5上传测试数据

bin/hadoop dfs -pu /usr/local/test.txt  /user/root/input/

6运行程序

root@ubuntu:/usr/local/userapp# bin/hadoop jar /usr/local/wordcount.jar org.myorg.WordCount /user/root/input output

7查看运行结果

bin/hadoop dfs -cat /user/root/output/*


0 0
原创粉丝点击