一些算法的MapReduce实现——矩阵分块乘法计算(2)
来源:互联网 发布:网络清洁器 编辑:程序博客网 时间:2024/05/17 11:04
Problem
M*N的矩阵A分割成s*t, N*P的矩阵B分割成t*v,
所以
Input
<M><i><j><m_ij>,M为矩阵,i, j为矩阵M的元素下标,m_ij为矩阵M行列下标为i,j的非0元素
假设矩阵A,B为
输入的样例数据如下:
A,0,1,1.0A,0,2,2.0A,0,3,3.0A,0,4,4.0A,1,0,5.0A,1,1,6.0A,1,2,7.0A,1,3,8.0A,1,4,9.0B,0,1,1.0B,0,2,2.0B,1,0,3.0B,1,1,4.0B,1,2,5.0B,2,0,6.0B,2,1,7.0B,2,2,8.0B,3,0,9.0B,3,1,10.0B,3,2,11.0B,4,0,12.0B,4,1,13.0B,4,2,14.0
Output
<i><j><m_ij>,结果矩阵行列下标i, j,以及对应的值m_ij
上述样例数据的数出结果为:
0,0,90.00,1,100.00,2,110.01,0,240.01,1,275.01,2,310.0
Pseudocode
矩阵AB的分块乘法计算分两步map-reduce进行
第一步
map(key, value): // value is ("A", i, j, a_ij) or ("B", j, k, b_jk) if value[0] == "A": i = value[1] j = value[2] a_ij = value[3] for k_per_v = 0 to p/v - 1: emit((i/s, j/t, k_per_v), ("A", i%s, j%t, a_ij)) else: j = value[1] k = value[2] b_jk = value[3] for i_per_s = 0 to m/s - 1: emit((i_per_s, j/t, k/v), ("B", j%t, k%v, b_jk)) reduce(key, values): // key is (i_per_s, j_per_t, k_per_v) // values is a list of ("A", i_mod_s, j_mod_t, a_ij) and ("B", j_mod_t, k_mod_v, b_jk) list_A = [(i_mod_s, j_mod_t, a_ij) for (M, i_mod_s, j_mod_t, a_ij) in values if M == "A"] list_B = [(j_mod_t, k_mod_v, b_jk) for (M, j_mod_t, k_mod_v, b_jk) in values if M == "B"] hash = {} for a in list_A: for b in list_B: if a[1] == b[0]: // a.j_mod_t == b.j_mod_t hash[(a[0], b[1])] += a[2]*b[2] for {(i_mod_s, k_mod_v): v} in hash: emit((key[0]*s + i_mod_s, key[2]*v + k_mod_v), v)
第二步
map(key, value): emit(key, value) reduce(key, values): result = 0 for value in values: result += value emit(key, result)
Hadoop代码
import java.io.IOException;import java.util.*;import java.util.AbstractMap.SimpleEntry;import java.util.Map.Entry; import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class BlockMatrixMultiplication { public static class Map extends Mapper<LongWritable, Text, Text, Text> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int m = Integer.parseInt(conf.get("m")); int p = Integer.parseInt(conf.get("p")); int s = Integer.parseInt(conf.get("s")); int t = Integer.parseInt(conf.get("t")); int v = Integer.parseInt(conf.get("v")); int mPerS = m/s; // Number of blocks in each column of A. int pPerV = p/v; // Number of blocks in each row of B. String line = value.toString(); String[] indicesAndValue = line.split(","); Text outputKey = new Text(); Text outputValue = new Text(); if (indicesAndValue[0].equals("A")) { int i = Integer.parseInt(indicesAndValue[1]); int j = Integer.parseInt(indicesAndValue[2]); for (int kPerV = 0; kPerV < pPerV; kPerV++) { outputKey.set(Integer.toString(i/s) + "," + Integer.toString(j/t) + "," + Integer.toString(kPerV)); outputValue.set("A," + Integer.toString(i%s) + "," + Integer.toString(j%t) + "," + indicesAndValue[3]); context.write(outputKey, outputValue); } } else { int j = Integer.parseInt(indicesAndValue[1]); int k = Integer.parseInt(indicesAndValue[2]); for (int iPerS = 0; iPerS < mPerS; iPerS++) { outputKey.set(Integer.toString(iPerS) + "," + Integer.toString(j/t) + "," + Integer.toString(k/v)); outputValue.set("B," + Integer.toString(j%t) + "," + Integer.toString(k%v) + "," + indicesAndValue[3]); context.write(outputKey, outputValue); } } } } public static class Reduce extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String[] value; ArrayList<Entry<String, Float>> listA = new ArrayList<Entry<String, Float>>(); ArrayList<Entry<String, Float>> listB = new ArrayList<Entry<String, Float>>(); for (Text val : values) { value = val.toString().split(","); if (value[0].equals("A")) { listA.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3]))); } else { listB.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3]))); } } String[] iModSAndJModT; String[] jModTAndKModV; float a_ij; float b_jk; String hashKey; HashMap<String, Float> hash = new HashMap<String, Float>(); for (Entry<String, Float> a : listA) { iModSAndJModT = a.getKey().split(","); a_ij = a.getValue(); for (Entry<String, Float> b : listB) { jModTAndKModV = b.getKey().split(","); b_jk = b.getValue(); if (iModSAndJModT[1].equals(jModTAndKModV[0])) { hashKey = iModSAndJModT[0] + "," + jModTAndKModV[1]; if (hash.containsKey(hashKey)) { hash.put(hashKey, hash.get(hashKey) + a_ij*b_jk); } else { hash.put(hashKey, a_ij*b_jk); } } } } String[] blockIndices = key.toString().split(","); String[] indices; String i; String k; Configuration conf = context.getConfiguration(); int s = Integer.parseInt(conf.get("s")); int v = Integer.parseInt(conf.get("v")); Text outputValue = new Text(); for (Entry<String, Float> entry : hash.entrySet()) { indices = entry.getKey().split(","); i = Integer.toString(Integer.parseInt(blockIndices[0])*s + Integer.parseInt(indices[0])); k = Integer.toString(Integer.parseInt(blockIndices[2])*v + Integer.parseInt(indices[1])); outputValue.set(i + "," + k + "," + Float.toString(entry.getValue())); context.write(null, outputValue); } } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // A is an m-by-n matrix; B is an n-by-p matrix. conf.set("m", "2"); conf.set("n", "5"); conf.set("p", "3"); conf.set("s", "2"); // Number of rows in a block in A. conf.set("t", "5"); // Number of columns in a block in A = number of rows in a block in B. conf.set("v", "3"); // Number of columns in a block in B. Job job = new Job(conf, "MatrixMatrixMultiplicationTwoStepsBlock"); job.setJarByClass(BlockMatrixMultiplication.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }}
第二步map-reduce代码简单,在此省略
Analysis
上述第一步map-reduce过程中:
1、reduce job的数目 = 所有发送给reduce任务的unique keys 的数目,即(m*n*p) / (s*t*v)
2、矩阵A发送给每个reduce元素的个数为st
3、矩阵B发送给每个reduce元素的个数为tv
communication cost:
1、The communication cost of the first Map tasks is
2、The communication cost of the first Reduce tasks is
3、The communication cost of the second Map tasks is
4、The communication cost of the second Reduce tasks is
如果s、t、v变大的话,那么矩阵被分割的块的数目就变少了,也就reduce的数目也将变少,每个reduce处理的数据将变大,也就需要更过的内存
Reference
翻译自 1、http://importantfish.com/block-matrix-multiplication-with-hadoop/
2、http://www.norstad.org/matrix-multiply/ 讲的十分详细的矩阵乘法计算
0 0
- 一些算法的MapReduce实现——矩阵分块乘法计算(2)
- 一些算法的MapReduce实现——矩阵分块乘法计算(1)
- 一些算法的MapReduce实现——矩阵-向量乘法实现
- 【甘道夫】Mapreduce实现矩阵乘法的算法思路
- 【甘道夫】Mapreduce实现矩阵乘法的算法思路
- Hadoop 稀疏矩阵乘法的MapReduce计算
- 一些算法的MapReduce实现——矩阵相乘一步实现
- 转置矩阵的分块并行乘法(C语言实现),计算矩阵C[rawn][rawn]=A[rawm][rawn]'*B[rawm][rawn],子块大小为S*T,其算法实现原理参加本代码的附件。
- MapReduce实现矩阵乘法
- Mapreduce实现矩阵乘法
- MapReduce实现矩阵乘法
- MapReduce的矩阵乘法的实现
- Hadoop MapReduce实现矩阵的乘法
- 矩阵乘法的MapReduce程序设计与实现
- 矩阵乘法的Mapreduce实现过程
- Hadoop-MapReduce下的PageRank 矩阵分块算法
- 矩阵乘法的算法实现
- 大矩阵的分块乘法及matlab实现
- XDU Problem 1255 - 数列游戏
- Git 常用指令详解
- Hibernate的generator属性
- 编程中犯的错误
- Hype Cycle (技术成熟度曲线)
- 一些算法的MapReduce实现——矩阵分块乘法计算(2)
- Debian 7下配置samba和Windows8共享
- HDOJ 1039 Easier Done Than Said?
- 黑马程序员---java学习笔记之泛型
- Yii tips 2
- 使用python的logging模块
- linux 常用命令
- java反射详解
- 彻底理解javascript的回调函数(推荐)