mapreduce实例

来源:互联网 发布:对比度算法 编辑:程序博客网 时间:2024/05/29 11:52

1.统计单词个数

(1)统计样本:word.txt(hadoop fs -put word.txt /word)

number linux hadoop word count
linux number hadoop word count
linux hadoop eclipse word count
linux hadoop word java count
eclipse
java map reduce

(2)WordMap.java

package com.test.org;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;


//例:number linux hadoop word count

public class WordMap extends Mapper<Object, Text, Text, IntWritable>{

//每次传入一行数据,输入类型为object,text-->0,number linux hadoop word count

 @Override

protected void map(Object key, Text value,Context context)

throws IOException, InterruptedException {

//以空格“ ”切分字符,{number,linux,hadoop,word,count}==>lines

String[] lines=value.toString().split(" ");

for (String word : lines) {

context.write(new Text(word), new IntWritable(1));

/*输出类型为text,intwritable

*(number,1)

*(linux,1)

*(hadoop,1)

*(word,1)

*count,1)

*/

}

}

}

(2)WordReduce.java

package com.test.org;

 

import java.io.IOException;

 

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

 //<linux,{1,1,1,1,}>

public class WordReduce extends Reducer<Text, IntWritable, Text,IntWritable>{

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

int sum=0;  //计数器

for (IntWritable count : values) {

sum+=count.get();

}

context.write(key, new IntWritable(sum)); //<linux,4>

 }

}

 

 

(3)WordCount.java

package com.test.org;

 

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class WordCount {

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{

//if语句判断输入输出路径是否正确

if(args.length !=2||args ==null){

System.out.println("input error");

System.exit(0);

}

Configuration conf=new Configuration();

@SuppressWarnings("deprecation")

Job job=new Job(conf, "WordCount");

//设置主类及mapper、reducer的业务实现类

job.setJarByClass(WordCount.class);

job.setMapperClass(WordMap.class);

job.setReducerClass(WordReduce.class);

//设置输入输出路径

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));


//设置输出的key与value的类型

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

//判断任务是否完成

job.waitForCompletion(true);

}

}

 

2.以“序号 单词”输出

package com.test.word;

 

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class SingleNum {

public static class NumMap extends Mapper<Object, Text , Text,IntWritable>{

@Override

protected void map(Object key, Text value, Context context)

throws IOException, InterruptedException {

String[] lines=value.toString().split(" ");

for (String word : lines) {

//不需要计数,故不设置value值

context.write(new Text(word),new IntWritable());

}

}

//设置输出的<key,value>的数据类型为intwritable,text

public static class NumReduce extends Reducer<Text,IntWritable, IntWritable,Text>{

int number=1;//序号值

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context ) throws IOException, InterruptedException {

context.write(new IntWritable(number), key);

number++;

}

}

public static void main(String []args) throws IOException, ClassNotFoundException, InterruptedException{

if (args.length!=2||args==null) {

System.out.println("error!");

System.exit(0);

}

Configuration configuration=new Configuration();

@SuppressWarnings("deprecation")

Job job=new Job(configuration,"SingleNum");

job.setJarByClass(SingleNum.class);

job.setMapperClass(NumMap.class);

job.setReducerClass(NumReduce.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));


//因为mapper输出的数据类型与reduce的数据类型不同,故需要设置mapper的key与value值

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(IntWritable.class);

job.setOutputValueClass(Text.class);

job.waitForCompletion(true);

}

}

}

 

3.计算平均分

(1)样本:grade.txt

小红    89
小明    78
小明    90
小红    79
小明    98
小明    84
灵儿    86
灵儿    78
灵儿    96 


(2)AvgGrade.java

package com.test.word;

 

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class AvgGrade {

public static class AvgMap extends Mapper<Object, Text, Text, IntWritable>{

@Override

protected void map(Object key, Text value, Context context)

throws IOException, InterruptedException {

//按Tab键切分数据

String[] lines=value.toString().split("\t");

//lines[0]代表姓名,lines[1]代表各科成绩

int grade=Integer.parseInt(lines[1]);

context.write(new Text(lines[0]),new IntWritable(grade));

}

}

//输出的数据类型为text,doublewritable

public static class AvgReduce extends Reducer<Text, IntWritable,Text, DoubleWritable>{

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

double sum=0;//计算总成绩

int count=0;//计数器

for (IntWritable word : values) {

sum+=word.get();

count++;

}

context.write(key,new DoubleWritable(sum/count));

}

}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{

if(args.length!=2||args==null){

System.out.println("error");

System.exit(0);

}

Configuration configuration=new Configuration();

@SuppressWarnings("deprecation")

Job job=new Job(configuration, "AvgGrade");

job.setJarByClass(AvgGrade.class);

job.setMapperClass(AvgMap.class);

job.setReducerClass(AvgReduce.class);

FileInputFormat.setInputPaths(job,new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(DoubleWritable.class);

job.waitForCompletion(true);

}

}

 

原创粉丝点击