用hadoop2.7.1 mapreduce实现QQ好友推荐功能

来源:互联网 发布:台湾网络电视tv版 编辑:程序博客网 时间:2024/05/22 00:19

1、模拟一组好友关系:

aaabbbcccdddeeefffggghhhbbbzzzeeeiii
(aaa和bbb是好友,ccc和ddd是好友……,直观可以看出应该给aaa-zzz,fff-iii作好友推荐

2、将数据导入mapper,两列分别作key和value

<pre name="code" class="java">package cn.nanda.QQ;import java.io.IOException;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class QQMapper extends Mapper<LongWritable, Text, Text, Text> {@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();String[] ss = StringUtils.split(line, "\t");context.write(new Text(ss[0]), new Text(ss[1]));context.write(new Text(ss[1]), new Text(ss[0]));}}


3、书写reducer

package cn.nanda.QQ;import java.io.IOException;import java.util.HashSet;import java.util.Iterator;import java.util.Set;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class QQReducer extends Reducer<Text, Text, Text, Text> {@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {Set<String> set = new HashSet<String>();for (Text text : values) {set.add(text.toString());}Iterator<String> _set = set.iterator();while (_set.hasNext()) {String a = _set.next();System.out.println(a);}if (set.size() > 1) {for (Iterator j = set.iterator(); j.hasNext();) {String name = (String) j.next();for (Iterator i = set.iterator(); i.hasNext();) {String other = (String) i.next();if (!name.equals(other)) {context.write(new Text(name), new Text(other));}}}}}}

4、申请一个job,并运行mapreduce

package cn.nanda.QQ;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import cn.nanda.wordCount.FlowBean;import cn.nanda.wordCount.FlowSumRunner;public class QQRunner extends Configured implements Tool {public int run(String[] args) throws Exception {Configuration conf = new Configuration();// 如果需要在hdfs云端运行MapReduce,需要加上下面的set,相应的路径填写hdfs上的路径// conf.set("fs.defaultFS","hdfs://localhost:9000/");Job job = Job.getInstance(conf);job.setJarByClass(QQRunner.class);job.setMapperClass(QQMapper.class);job.setReducerClass(QQReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job, new Path(args[0]));// 检查输出路径是否存在,若存在,则删除之前的路径,并新建路径Path output = new Path(args[1]);FileSystem fs = FileSystem.get(conf);if (fs.exists(output)) {fs.delete(output, true);}FileOutputFormat.setOutputPath(job, output);return job.waitForCompletion(true) ? 0 : 1;}public static void main(String[] args) throws Exception {int run = ToolRunner.run(new Configuration(), new QQRunner(), args);System.exit(run);}}

5、运行结果为:

aaazzzzzzaaafffiiiiiifff

和我们预期的一样!



0 0
原创粉丝点击