mapreduce编程实例(7)-求所有用户ID
来源:互联网 发布:深孔钻编程视频教程 编辑:程序博客网 时间:2024/05/17 08:05
在网站评论中,有些用户评论过多次,有些评论过一次。我们需要求出评论过的用户。这相当于sql语句中的select distinct UserID from .....
代码如下:
package mrdp.ch3;import java.io.IOException;import java.util.Map;import mrdp.utils.MRDPUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class DistinctUserDriver {public static class SODistinctUserMapper extendsMapper<Object, Text, Text, NullWritable> {private Text outUserId = new Text();@Overridepublic void map(Object key, Text value, Context context)throws IOException, InterruptedException {// Parse the input into a nice map.Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString());// Get the value for the UserId attributeString userId = parsed.get("UserId");// If it is null, skip this recordif (userId == null) {return;}// Otherwise, set our output key to the user's idoutUserId.set(userId);// Write the user's id with a null valuecontext.write(outUserId, NullWritable.get());}}public static class SODistinctUserReducer extendsReducer<Text, NullWritable, Text, NullWritable> {@Overridepublic void reduce(Text key, Iterable<NullWritable> values,Context context) throws IOException, InterruptedException {// Write the user's id with a null valuecontext.write(key, NullWritable.get());}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: UniqueUserCount <in> <out>");System.exit(2);}Job job = new Job(conf, "StackOverflow Distinct Users");job.setJarByClass(DistinctUserDriver.class);job.setMapperClass(SODistinctUserMapper.class);job.setCombinerClass(SODistinctUserReducer.class);job.setReducerClass(SODistinctUserReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(NullWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));System.exit(job.waitForCompletion(true) ? 0 : 1);}}
这个非常简单,就把UserId作为key,然后把Key输出出来就OK了。
从这个例子可以引申到求用户一共评论了几条,相当于又是一个wordcount,代码如下:
package mrdp.ch3;import java.io.IOException;import java.util.Map;import mrdp.utils.MRDPUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;import org.apache.hadoop.util.GenericOptionsParser;public class UniqueUserCount {public static class SODistinctUserMapper extendsMapper<Object, Text, Text, NullWritable> {private Text outUserId = new Text();@Overridepublic void map(Object key, Text value, Context context)throws IOException, InterruptedException {Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString());String userId = parsed.get("UserId");if (userId == null) {return;}outUserId.set(userId);context.write(outUserId, NullWritable.get());}}public static class SODistinctUserReducer extendsReducer<Text, NullWritable, Text, NullWritable> {@Overridepublic void reduce(Text key, Iterable<NullWritable> values,Context context) throws IOException, InterruptedException {context.write(key, NullWritable.get());}}public static class SOUserCounterMapper extendsMapper<Text, NullWritable, Text, IntWritable> {private static final Text DUMMY = new Text("Total:");private static final IntWritable ONE = new IntWritable(1);@Overridepublic void map(Text key, NullWritable value, Context context)throws IOException, InterruptedException {context.write(DUMMY, ONE);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: UniqueUserCount <in> <out>");System.exit(2);}Path tmpout = new Path(otherArgs[1] + "_tmp");FileSystem.get(new Configuration()).delete(tmpout, true);Path finalout = new Path(otherArgs[1]);Job job = new Job(conf, "StackOverflow Unique User Count");job.setJarByClass(UniqueUserCount.class);job.setMapperClass(SODistinctUserMapper.class);job.setCombinerClass(SODistinctUserReducer.class);job.setReducerClass(SODistinctUserReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(NullWritable.class);job.setOutputFormatClass(SequenceFileOutputFormat.class);job.setNumReduceTasks(1);FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, tmpout);boolean exitCode = job.waitForCompletion(true);if (exitCode) {job = new Job(conf, "Stack Overflow Unique User Count");job.setJarByClass(UniqueUserCount.class);job.setMapperClass(SOUserCounterMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setInputFormatClass(SequenceFileInputFormat.class);FileInputFormat.addInputPath(job, tmpout);FileOutputFormat.setOutputPath(job, finalout);exitCode = job.waitForCompletion(true);}System.exit(exitCode ? 0 : 1);}}
0 0
- mapreduce编程实例(7)-求所有用户ID
- MapReduce编程实例(一)-求平均数
- mapreduce编程实例(3)-求平均值
- mapreduce编程实例(3)-求平均值
- mapreduce编程实例(6)-求TOP 10
- MapReduce编程实例(一)-求平均数
- mapreduce编程实例(2)-求最大值和最小值
- mapreduce编程实例(4)-求中位数和标准差
- mapreduce编程实例(2)-求最大值和最小值
- mapreduce编程实例(4)-求中位数和标准差
- mapreduce编程:求平均值
- MapReduce编程实例
- mapreduce python编程实例
- MapReduce编程实例
- MapReduce WordCount编程实例
- MapReduce编程(四) 求均值
- octopy的MapReduce编程实例
- MapReduce编程实例(一)
- 经纬财富:湛江现货白银结算时间
- Android系统存在多个Launcher时,设置开机进入默认的Launcher
- 高考失利——学什么技术能高薪就业?
- 通信图
- HTTP基本认证(Basic Authentication)的JAVA示例
- mapreduce编程实例(7)-求所有用户ID
- SourceKitService Terminated Editor functionality temporarily limited.
- Facade模式
- 遇到 Form 性能问题怎么办 performance issue
- c++的重载、覆盖和遮蔽(隐藏)
- Android中使用代码截图的各种方法总结
- java socket实现全双工通信
- Struts2的Action中获取request,session,application对象
- 区位码、国标码、机内码