刘刚 mapside 实现

来源:互联网 发布:linux创建新用户命令 编辑:程序博客网 时间:2024/05/17 02:51
package MapJoin;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.net.URI;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class MapSideJoin extends Configured implements Tool {public static class MapClass extends Mapper<LongWritable, Text, Text, Text> {private Map<String, String> customersMap = new HashMap<String, String>();private Text oKey = new Text();private String[] order;// 此方法会在map方法执行之前执行@Overrideprotected void setup(Context context) throws IOException,InterruptedException {super.setup(context);BufferedReader in = null;try {// 从当前作业中获取要缓存的文件String Customer = null;URI[] pathuri = context.getCacheFiles();String pathString = pathuri[0].toString();Path path = new Path(pathString);in = new BufferedReader(new FileReader(path.toString()));while (null != (Customer = in.readLine())) {String[] cust = Customer.split(",");customersMap.put(cust[0], cust[1] + "," + cust[2]);}} catch (IOException e) {e.printStackTrace();} finally {try {if (in != null) {in.close();}} catch (IOException e) {e.printStackTrace();}}}public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {order = value.toString().split(",");String orderTail = order[1] + "," + order[2] + "," + order[3];oKey.set(order[0] + "," + customersMap.get(order[0]) + ","+ orderTail);context.write(oKey, new Text());}}public int run(String[] args) throws Exception {Job job = new Job(getConf(), "MultiTableJoin");job.setJobName("MultiTableJoin");job.setJarByClass(MapSideJoin.class);job.setMapperClass(MapClass.class);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(TextOutputFormat.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);job.addCacheFile(new URI("hdfs://lx-zhujiming:9000/user/......./tmp/input/Customers.csv"));job.addCacheFile(new URI("/home/......../esktop/Customers.csv"));String[] otherArgs = new GenericOptionsParser(job.getConfiguration(),args).getRemainingArgs();FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));return job.waitForCompletion(true) ? 0 : 1;}public static void main(String[] args) throws Exception {int res = ToolRunner.run(new Configuration(), new MapSideJoin(), args);System.exit(res);}}


0 0