hadoop入门教程-程序小实例

来源:互联网 发布:网络跟踪 编辑:程序博客网 时间:2024/05/21 11:23

无论是在微信还是QQ,我们经常看到好友推荐这样的功能,其实这个功能是在大数据的基础上实现的,下面来看具体的代码实现:

在src下添加三个类:JobRun.java:

package com.lftgb.mr;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class JobRun {public static void main(String[] args) {Configuration conf = new Configuration();conf.set("mapred.job.tracker", "192.168.152.128:9001");conf.set("fs.default.name", "hdfs://192.168.152.128:9000");conf.set("mapred.jar", "C:\\Users\\志鹏\\Desktop\\hadoop程序\\qq.jar");try {Job job = new Job(conf);/* * job.setJarByClass(JobRun.class); * job.setMapperClass(WcMapper.class); * job.setReducerClass(WcReducer.class); * job.setMapOutputKeyClass(Text.class); * job.setMapOutputValueClass(IntWritable.class); *  * // job.setNumReduceTasks(1);//设置reduce任务的个数 默认是一个 *  * // mapreduce 输入数据所在的目录或者文件 FileInputFormat.addInputPath(job, new * Path("/usr/my2016")); // mr执行之后的数据数据目录 * FileOutputFormat.setOutputPath(job, new Path("/usr/output")); */job.setJobName("qq");job.setJarByClass(JobRun.class);job.setMapperClass(Test2Mapper.class);job.setReducerClass(Test2Reduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// mapreduce 输入数据所在的目录或者文件FileInputFormat.addInputPath(job, new Path("/usr/input/qq/"));// mr执行之后的数据数据目录FileOutputFormat.setOutputPath(job, new Path("/usr/output/qq"));try {System.exit(job.waitForCompletion(true) ? 0 : 1);} catch (ClassNotFoundException e) {e.printStackTrace();} catch (InterruptedException e) {e.printStackTrace();}} catch (IOException e) {e.printStackTrace();}}}
   QQMapper.java:

package com.lftgb.mr;import java.io.IOException;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class Test2Mapper extends Mapper<LongWritable, Text, Text, Text> {protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();String[] ss = line.split("\t");context.write(new Text(ss[0]),new Text(ss[1]));context.write(new Text(ss[1]),new Text(ss[0]));}}
    QQReduce.java:

package com.lftgb.mr;import java.io.IOException;import java.util.HashSet;import java.util.Iterator;import java.util.Set;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class Test2Reduce extends Reducer<Text, Text, Text, Text> {protected void reduce(Text key, Iterable<Text> i,Context arg2) throws IOException,InterruptedException {Set<String>set= new HashSet<String>();for(Text t:i){set.add(t.toString());}if(set.size()>1){for (Iterator j = set.iterator(); j.hasNext();) {String name = (String) j.next();for (Iterator k = set.iterator(); k.hasNext();) {String other = (String) k.next();if(name.equals(other)){arg2.write(new Text(name),new Text(other));}}}}}}
在eclipse下,与hadoop结合可以更有效的实现大数据处理的功能,深入的研究请期待小编的下次博客!!



0 0
原创粉丝点击