Hadoop 2.6 使用Map Reduce实现矩阵相乘2 矩阵相乘
来源:互联网 发布:上古时代网络用语 编辑:程序博客网 时间:2024/04/30 11:16
项目地址:https://github.com/tudoupaisimalingshu/hadoop_matrix
一、首先将右侧矩阵进行转置(点击打开)
二、然后进行相乘运算
1、将右侧转置矩阵缓存到Hadoop中
2、对左侧矩阵进行map操作
将左侧矩阵的每一行line,进行拆分,拆分成列(分量)数组,然后与缓存中的每一行进行相乘运算,然后将结果和对应的位置写回
3、对结果进行拼接,形成结果矩阵的物理存储
package hadoop;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Reducer.Context;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;import hadoop.Step1.Mapper1;import hadoop.Step1.Reducer1;public class Step2 {public static class Mapper2 extends Mapper<LongWritable,Text,Text,Text>{private Text outKey = new Text();private Text outValue = new Text();private List<String> cacheList = new ArrayList<String>();/*** * 将保存右侧矩阵的文件缓存到内存中,每一行为一个字符串,是所有行构成list */@Overrideprotected void setup(Context context)throws IOException, InterruptedException {FileReader fr = new FileReader("matrix2");//从缓存中拿到matrix2(main方法中设置)BufferedReader br = new BufferedReader(fr);String line = null;while((line = br.readLine()) != null){cacheList.add(line);}fr.close();br.close();}/*左侧矩阵逻辑形式 * 12-20 * 334-3 * -2023 * 53-12 * -4202 * 左侧矩阵物理形式 * 11_1,2_2,3_-2,4_0 * 21_3,2_3,3_4,4_-3 * 31_-2,2_0,3_2,4_3 * 41_5,2_3,3_-1,4_2 * 51_-4,2_2,3_0,4_2 * * 右侧矩阵(已转置)物理形式 * 13_0,1_0,4_-2,2_123_1,4_2,2_3,1_334_-1,1_-1,3_4,2_541_2,3_-1,4_1,2_-254_2,3_2,1_-3,2_-1key: "1"value: "11_1,2_2,3_-2,4_0" * */@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String[] rowAndline = value.toString().split("\t");//获得行号//rowAndline : {"1","1_1,2_2,3_-2,4_0"}String row_matrix1 = rowAndline[0];//row_matrix1 :"1"String[] column_value_array_matrix1 = rowAndline[1].split(",");//获得各列//rowAndline[1] : "1_1,2_2,3_-2,4_0"//column_value_array_matrix1 : {"1_1","2_2","3_-2","4_0"}for(String line : cacheList)// 以line:"34_-1,1_-1,3_4,2_5"为例{String[] rowAndline2 = line.toString().split("\t");//rowAndline2 : {"3","4_-1,1_-1,3_4,2_5"}String row_matrix2 = rowAndline2[0];//获得转置矩阵line行的行号(原右矩阵的列号)String[] column_value_array_matrix2 = rowAndline2[1].split(",");//rowAndline2[1] : "4_-1,1_-1,3_4,2_5"//column_value_array_matrix2 : {"4_-1","1,-1","3_4","2_5"}int result = 0;//保存成绩累加结果for(String column_value_matrix1 : column_value_array_matrix1)//对于左侧矩阵line行的每一列(分量) "1_1","2_2","3_-2","4_0"{String column_maxtrix1 = column_value_matrix1.split("_")[0];//获得列号String value_matrix1 = column_value_matrix1.split("_")[1];//获得该列的值for(String column_value_matrix2 : column_value_array_matrix2)//对于右侧矩阵的line行的每一列(分量) "4_-1","1,-1","3_4","2_5"{String column_maxtrix2 = column_value_matrix2.split("_")[0];//获得列号String value_matrix2 = column_value_matrix2.split("_")[1];//获得该列的值if(column_maxtrix2.equals(column_maxtrix1))//这里也体现了为什么要标明列号,只有列号明确且相等,才证明是同一个位置的分量{result += Integer.valueOf(value_matrix1) * Integer.valueOf(value_matrix2);//result += 1 * (-1)//result += 2 * 5//result += -2 * 4//result += 0 * (-1)}}}outKey.set(row_matrix1);//输出的key值设置为左侧矩阵的行号outValue.set(row_matrix2 + "_" +result);//输出的value值设置为右侧转置矩阵的行号(实际矩阵的列号)_该位置的值context.write(outKey, outValue);//("1","3_1")}//("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9")//("2","1_9")...//....}}public static class Reducer2 extends Reducer<Text,Text,Text,Text>{private Text outKey = new Text();private Text outValue = new Text();/** * 将map产生的key-value对进行组合,拼接成结果矩阵的物理形式 * ("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9") * ("2","1_9")... * ... * 对于key值相同的元素("1","2_7")("1,"3_1")("1","2_4")("1","4_0")("1","5_9") * 会将其组合 * key : "1" * values : {"2_7","3_1","2_4","4_0","5_9"} * */@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {StringBuilder sb = new StringBuilder();for(Text text : values){sb.append(text + ",");}// sb : "2_7,3_1,2_4,4_0,5_9,"String line = "";if(sb.toString().endsWith(",")){line = sb.substring(0,sb.length()-1);}//line :"2_7,3_1,2_4,4_0,5_9"outKey.set(key);outValue.set(line);context.write(outKey, outValue);// ("1","2_7,3_1,2_4,4_0,5_9")}}private static final String INPATH = "input/matrix.txt";private static final String OUTPATH = "hdfs://pc1:9000/output/step2_3";private static final String CACHE = "hdfs://pc1:9000/cache/matrix.txt";private static final String HDFS = "hdfs://pc1:9000";public void run() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Configuration conf = new Configuration(); //String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); //String[] otherArgs = {"hdfs://pc1:9000/input/chenjie.txt","hdfs://pc1:9000/output/out4"}; String[] otherArgs = {INPATH,OUTPATH}; //这里需要配置参数即输入和输出的HDFS的文件路径 if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } //conf.set("fs.defaultFS",HDFS); // JobConf conf1 = new JobConf(WordCount.class); Job job = new Job(conf, "step2");//Job(Configuration conf, String jobName) 设置job名称和 job.setJarByClass(Step2.class); job.setMapperClass(Mapper2.class); //为job设置Mapper类 //job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类 job.setReducerClass(Reducer2.class); //为job设置Reduce类 job.addCacheArchive(new URI(CACHE + "#matrix2")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); //设置输出key的类型 job.setOutputValueClass(Text.class);// 设置输出value的类型 job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //为map-reduce任务设置InputFormat实现类 设置输入路径 FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//为map-reduce任务设置OutputFormat实现类 设置输出路径 System.exit(job.waitForCompletion(true) ? 0 : 1);/*Configuration conf = new Configuration();conf.set("fs.defaultFS",HDFS);Job job = Job.getInstance(conf,"step1");job.setJarByClass(Step1.class);job.setMapperClass(Mapper1.class);job.setReducerClass(Reducer1.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileSystem fs = FileSystem.get(conf);Path inPath = new Path(INPATH);if(fs.exists(inPath)){//FileInputFormat.addInputPath(conf, inPath);}Path outPath = new Path(OUTPATH);if(fs.exists(outPath)){fs.delete(outPath, true);}*/}public static void main(String[] args){try {new Step2().run();} catch (ClassNotFoundException | IOException | InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (URISyntaxException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}
三、运行结果
使用hadoop fs -text 路径 查看结果
阅读全文
0 0
- Hadoop 2.6 使用Map Reduce实现矩阵相乘2 矩阵相乘
- Hadoop 2.6 使用Map Reduce实现矩阵相乘1 矩阵转置
- Hadoop 实现矩阵相乘
- hadoop矩阵相乘简单实现
- Hadoop矩阵相乘
- MapReduce实现矩阵相乘
- Python实现矩阵相乘
- java实现矩阵相乘
- mapreduce实现矩阵相乘
- python 实现矩阵相乘
- MapReduce实现矩阵相乘
- mpi实现矩阵相乘
- Java实现矩阵相乘
- 矩阵相乘实现
- MapReduce实现矩阵相乘
- 二维矩阵实现矩阵相乘
- 使用MapReduce实现矩阵向量相乘
- 矩阵相乘2
- Nodemcu上使用Mongoose OS:用户自定义配置
- 结构体
- C++中compare函数的使用
- NIO 03 DatagramChannel
- java中轻量级数据库ORM框架:JOOQ
- Hadoop 2.6 使用Map Reduce实现矩阵相乘2 矩阵相乘
- iOS开发之沙盒机制
- 托管语言
- Alertdilog自定义样式
- test
- CSAPP LAB2 BombLab
- 移动站如何进行seo优化?
- 安卓Studio 依赖、权限
- linux屏幕键盘