WordCountMapReduce 代码

来源：互联网发布：ubuntu灰色壁纸编辑：程序博客网时间：2024/06/06 01:26

###WordCountMapReduce 1

package com.myblue.myhdfs;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMapReduce{

//mapper

public static class WordCountMapper extends

Mapper<LongWritable, Text, Text, LongWritable> {

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

//输入行

System.out.println(key.get());

String lineValue = value.toString();

String[] splits = lineValue.split("\t");

Text mapOutputKey = new Text(); //输出键

LongWritable mapOutputValue = new LongWritable(1); //输出值，本例恒为1

for (String s : splits) {

mapOutputKey.set(s);

context.write(mapOutputKey, mapOutputValue);

}

}

}

//reducer

public static class WordCountReducer extends

Reducer<Text, LongWritable, Text, LongWritable> {

protected void reduce(Text key, Iterable<LongWritable> values,

Context context) throws IOException, InterruptedException {

long sum = 0;

for (LongWritable value : values) {

sum += value.get();

}

LongWritable outputValue = new LongWritable();

outputValue.set(sum);

context.write(key, outputValue);

}

}

public static void main(String[] args) throws Exception {

args=new String[]{"/input","/output"};

Configuration conf = new Configuration();

//创建作业

Job job = Job.getInstance(conf);

job.setJarByClass(WordCountMapReduce.class);

//输入路径

FileInputFormat.addInputPath(job, new Path(args[0]));

//输出路径

Path outPath = new Path(args[1]);

FileSystem dfs = FileSystem.get(conf);

if (dfs.exists(outPath)) {

dfs.delete(outPath, true);

}

FileOutputFormat.setOutputPath(job, outPath);

//mapper

job.setMapperClass(WordCountMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(LongWritable.class);

//reducer

job.setReducerClass(WordCountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(LongWritable.class);

//提交作业

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

###WordCountMapReduce 2

package com.myblue.myhdfs;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

* MapReuce

*

* @author beifeng

*

*/

public class WordCountMapReduce2 extends Configured implements Tool{

// step 1: Map Class

/**

*

* public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>

*/

public static class WordCountMapper extends

Mapper<LongWritable, Text, Text, IntWritable> {

private Text mapOutputKey = new Text();

private final static IntWritable mapOuputValue = new IntWritable(1);

@Override

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

// line value

String lineValue = value.toString();

// split

// String[] strs = lineValue.split(" ");

StringTokenizer stringTokenizer = new StringTokenizer(lineValue);

// iterator

while(stringTokenizer.hasMoreTokens()){

// get word value

String wordValue = stringTokenizer.nextToken();

// set value

mapOutputKey.set(wordValue);;

// output

context.write(mapOutputKey, mapOuputValue);

}

}

}

// step 2: Reduce Class

/**

*

* public class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT>

*/

public static class WordCountReducer extends

Reducer<Text, IntWritable, Text, IntWritable> {

private IntWritable outputValue = new IntWritable();

@Override

public void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

// sum tmp

int sum= 0 ;

// iterator

for(IntWritable value: values){

// total

sum += value.get();

}

// set value

outputValue.set(sum);

// output

context.write(key, outputValue);

}

}

// step 3: Driver ,component job

public int run(String[] args) throws Exception {

// 1: get confifuration

Configuration configuration = getConf();

// 2: create Job

Job job = Job.getInstance(configuration, //

this.getClass().getSimpleName());

// run jar

job.setJarByClass(this.getClass());

// 3: set job

// input -> map -> reduce -> output

// 3.1: input

Path inPath = new Path(args[0]);

FileInputFormat.addInputPath(job, inPath);

// 3.2: map

job.setMapperClass(WordCountMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

// 3.3: reduce

job.setReducerClass(WordCountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

// 3.4: output

Path outPath = new Path(args[1]);

FileSystem dfs = FileSystem.get(configuration);

if (dfs.exists(outPath)) {

dfs.delete(outPath, true);

}

FileOutputFormat.setOutputPath(job, outPath);

// 4: submit job

boolean isSuccess = job.waitForCompletion(true);

return isSuccess ? 0 : 1 ;

}

// step 4: run program

public static void main(String[] args) throws Exception {

// 1: get confifuration

Configuration configuration = new Configuration();

// int status = new WordCountMapReduce().run(args);

int status = ToolRunner.run(configuration,//

(Tool) new WordCountMapReduce(),//

args);

System.exit(status);

}

}

###WordCountMapReduce 3

package bigdata.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class WordCountMR extends Configured implements Tool {

public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

private Text mapOutputKey = new Text();

private IntWritable mapOutputValue = new IntWritable(1);

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line = value.toString();

System.out.println("key of mapInput:" + key);

System.out.println("value of mapInput:" + value);

String[] strs = line.split("\t");

for (String str : strs) {

mapOutputKey.set(str);

context.write(mapOutputKey, mapOutputValue);

}

System.out.println(mapOutputKey);

System.out.println("=======分隔符=======");

}

}

public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

private IntWritable outputValue = new IntWritable(0);

@Override

protected void reduce(Text key, Iterable<IntWritable> values, Context context)

throws IOException, InterruptedException {

System.out.print("key:" + key + "\t" + "values: lsit< ");

int sum = 0;

for (IntWritable value : values) {

System.out.print(value + ",");

sum += value.get();

}

System.out.print(" >" + "\n");

outputValue = new IntWritable(sum);

context.write(key, outputValue);

}

}

public static class myPartition extends Partitioner<Text, IntWritable> {

@Override

public int getPartition(Text key, IntWritable value, int numPartitions) {

String str = key.toString();

int mokey = Integer.valueOf(str);

if (mokey >= 0 && mokey < 7) {

return 0;

} else if (mokey >= 7 && mokey <= 9) {

return 1;

} else {

return 2;

}

}

}

public int run(String[] args) throws Exception {

Configuration config = new Configuration();

Job job = Job.getInstance(config, this.getClass().getSimpleName());

job.setJarByClass(getClass());

Path inPath = new Path(args[0]);

FileInputFormat.setInputPaths(job, inPath);

job.setMapperClass(WordCountMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setNumReduceTasks(3);

job.setPartitionerClass(myPartition.class);

job.setReducerClass(WordCountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

Path outPath = new Path(args[1]);

FileSystem fs = outPath.getFileSystem(config);

if (fs.exists(outPath)) {

fs.delete(outPath, true);

}

FileOutputFormat.setOutputPath(job, outPath);

boolean isSuccess = job.waitForCompletion(true);

return isSuccess ? 0 : 1;

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

args = new String[] {

// "hdfs://apache.bigdata.com:8020/input/wc.txt",

// "hdfs://apache.bigdata.com:8020/output"

"E:\\sort.txt", "E:\\mapreuceout" };

int status = ToolRunner.run(conf, new WordCountMR(), args);

System.exit(status);

}

}

阅读全文

0 0