WordCountMapReduce 代码

来源:互联网 发布:ubuntu灰色壁纸 编辑:程序博客网 时间:2024/06/06 01:26
###WordCountMapReduce 1

package com.myblue.myhdfs;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMapReduce{

//mapper
public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, LongWritable> {

protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//输入行
System.out.println(key.get());
String lineValue = value.toString();
String[] splits = lineValue.split("\t");
Text mapOutputKey = new Text(); //输出键
LongWritable mapOutputValue = new LongWritable(1); //输出值,本例恒为1
for (String s : splits) {
mapOutputKey.set(s);
context.write(mapOutputKey, mapOutputValue);
}
}
}

//reducer
public static class WordCountReducer extends
Reducer<Text, LongWritable, Text, LongWritable> {

protected void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {

long sum = 0;
for (LongWritable value : values) {
sum += value.get();
}
LongWritable outputValue = new LongWritable();
outputValue.set(sum);
context.write(key, outputValue);
}
}

public static void main(String[] args) throws Exception {
args=new String[]{"/input","/output"};
Configuration conf = new Configuration();

//创建作业
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountMapReduce.class);

//输入路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//输出路径
Path outPath = new Path(args[1]);
FileSystem dfs = FileSystem.get(conf);
if (dfs.exists(outPath)) {
dfs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);

//mapper
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);

//reducer
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);

//提交作业
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

###WordCountMapReduce 2
package com.myblue.myhdfs;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
* MapReuce
*
* @author beifeng
*
*/
public class WordCountMapReduce2 extends Configured implements Tool{

// step 1: Map Class
/**
*
* public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
*/
public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();
private final static IntWritable mapOuputValue = new IntWritable(1);

@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// line value
String lineValue = value.toString();
// split
// String[] strs = lineValue.split(" ");
StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
// iterator
while(stringTokenizer.hasMoreTokens()){
// get word value
String wordValue = stringTokenizer.nextToken();
// set value
mapOutputKey.set(wordValue);;
// output
context.write(mapOutputKey, mapOuputValue);
}
}

}

// step 2: Reduce Class
/**
*
* public class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
*/
public static class WordCountReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {

private IntWritable outputValue = new IntWritable();
@Override
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
// sum tmp
int sum= 0 ;
// iterator
for(IntWritable value: values){
// total
sum += value.get();
}
// set value
outputValue.set(sum);
// output
context.write(key, outputValue);
}

}

// step 3: Driver ,component job
public int run(String[] args) throws Exception {
// 1: get confifuration
Configuration configuration = getConf();
// 2: create Job
Job job = Job.getInstance(configuration, //
this.getClass().getSimpleName());
// run jar
job.setJarByClass(this.getClass());
// 3: set job
// input -> map -> reduce -> output
// 3.1: input
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job, inPath);
// 3.2: map
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 3.3: reduce
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 3.4: output
Path outPath = new Path(args[1]);
FileSystem dfs = FileSystem.get(configuration);
if (dfs.exists(outPath)) {
dfs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
// 4: submit job
boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1 ;
}

// step 4: run program
public static void main(String[] args) throws Exception {
// 1: get confifuration
Configuration configuration = new Configuration();
// int status = new WordCountMapReduce().run(args);
int status = ToolRunner.run(configuration,//
(Tool) new WordCountMapReduce(),//
args);
System.exit(status);
}

}

###WordCountMapReduce 3

package bigdata.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCountMR extends Configured implements Tool {

public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();
private IntWritable mapOutputValue = new IntWritable(1);

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
System.out.println("key of mapInput:" + key);
System.out.println("value of mapInput:" + value);
String[] strs = line.split("\t");

for (String str : strs) {
mapOutputKey.set(str);
context.write(mapOutputKey, mapOutputValue);
}
System.out.println(mapOutputKey);
System.out.println("=======分隔符=======");

}

}

public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable outputValue = new IntWritable(0);

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
System.out.print("key:" + key + "\t" + "values: lsit< ");
int sum = 0;

for (IntWritable value : values) {
System.out.print(value + ",");
sum += value.get();
}
System.out.print(" >" + "\n");
outputValue = new IntWritable(sum);

context.write(key, outputValue);
}

}

public static class myPartition extends Partitioner<Text, IntWritable> {

@Override
public int getPartition(Text key, IntWritable value, int numPartitions) {
String str = key.toString();
int mokey = Integer.valueOf(str);
if (mokey >= 0 && mokey < 7) {
return 0;
} else if (mokey >= 7 && mokey <= 9) {
return 1;
} else {
return 2;
}

}
}

public int run(String[] args) throws Exception {

Configuration config = new Configuration();

Job job = Job.getInstance(config, this.getClass().getSimpleName());

job.setJarByClass(getClass());

Path inPath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inPath);

job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setNumReduceTasks(3);
job.setPartitionerClass(myPartition.class);

job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

Path outPath = new Path(args[1]);
FileSystem fs = outPath.getFileSystem(config);

if (fs.exists(outPath)) {
fs.delete(outPath, true);
}

FileOutputFormat.setOutputPath(job, outPath);

boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
args = new String[] {
// "hdfs://apache.bigdata.com:8020/input/wc.txt",
// "hdfs://apache.bigdata.com:8020/output"
"E:\\sort.txt", "E:\\mapreuceout" };

int status = ToolRunner.run(conf, new WordCountMR(), args);

System.exit(status);

}

}
原创粉丝点击