MapReduce之单词（字母）个数的统计

来源：互联网发布：手游日常数据分析编辑：程序博客网时间：2024/06/05 07:12

package com.hadoop.study;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
*
* @author：戴桥冰 2015-10-6 上午10:33:54 content：hadoop的单词统计案例
*/
public class WordCount {

   /**
   * map代码块
   */
   static class MapperCount extends
           Mapper<LongWritable, Text, Text, IntWritable> {

       private final static IntWritable intValue = new IntWritable(1);
       private Text text = new Text();

       @Override
       protected void map(LongWritable key, Text value, Context context)
               throws IOException, InterruptedException {
           // 获取每行数据
           String lineValue = value.toString();
           // 对每行进行分割
           //StringTokenizer strTokenizer = new StringTokenizer(lineValue);

           String[] line=lineValue.split("");
           // 遍历
           /*while (strTokenizer.hasMoreTokens()) {
               String wordCount = strTokenizer.nextToken();
               text.set(wordCount);
               System.out.println(wordCount);
               // 上下文输出map处理的key，value
               context.write(text, intValue);
           }*/

           for(int i=0;i<line.length;i++){
               text.set(line[i]);
               // 上下文输出map处理的key，value
               context.write(text, intValue);
           }

       }
   }

   /**
   * reduce代码块
   */
   static class ReduceCount extends
           Reducer<Text, IntWritable, Text, IntWritable> {

       private IntWritable result=new IntWritable();

       protected void reduce(Text keys, Iterable<IntWritable> values,
               Context context) throws IOException, InterruptedException {
                 int sum=0;
                 //循环遍历
                 for(IntWritable val:values){
                   sum+=val.get();
                 }

                 result.set(sum);
                 context.write(keys, result);
       };
   }

   public static void main(String[] args) throws Exception {

       //获取配置信息
       Configuration conf=new Configuration();
       //创建Job，设置配置和job名称和
       Job job=new Job(conf,"wordcount");

        //1设置job运行的类
       job.setJarByClass(WordCount.class);

       //2设置map和reduce运行的类
       job.setMapperClass(MapperCount.class);
       job.setReducerClass(ReduceCount.class);

       //3设置输入文件的目录和输出文件的目录
       FileInputFormat.addInputPath(job,new Path("/opt/data/test.txt"));
       FileOutputFormat.setOutputPath(job, new Path("/opt/data/hadoop/test/out"));

       //4设置输出结果的key，value的类型
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(IntWritable.class);

       //5提交job等待运行结果，并在客户端运行输出信息
           boolean isSuccess=job.waitForCompletion(true);

       //结束程序
           System.exit(isSuccess?0:1);
   }
}

0 0