一些算法的MapReduce实现——矩阵分块乘法计算（2）

来源：互联网发布：网络清洁器编辑：程序博客网时间：2024/05/17 11:04

Problem

M*N的矩阵A分割成s*t, N*P的矩阵B分割成t*v，

所以

Input

<M><i><j><m_ij>，M为矩阵，i， j为矩阵M的元素下标，m_ij为矩阵M行列下标为i，j的非0元素

假设矩阵A，B为

输入的样例数据如下：

A,0,1,1.0A,0,2,2.0A,0,3,3.0A,0,4,4.0A,1,0,5.0A,1,1,6.0A,1,2,7.0A,1,3,8.0A,1,4,9.0B,0,1,1.0B,0,2,2.0B,1,0,3.0B,1,1,4.0B,1,2,5.0B,2,0,6.0B,2,1,7.0B,2,2,8.0B,3,0,9.0B,3,1,10.0B,3,2,11.0B,4,0,12.0B,4,1,13.0B,4,2,14.0

Output

<i><j><m_ij>，结果矩阵行列下标i， j，以及对应的值m_ij

上述样例数据的数出结果为:

0,0,90.00,1,100.00,2,110.01,0,240.01,1,275.01,2,310.0

Pseudocode

矩阵AB的分块乘法计算分两步map-reduce进行

第一步

map(key, value):    // value is ("A", i, j, a_ij) or ("B", j, k, b_jk)    if value[0] == "A":        i = value[1]        j = value[2]        a_ij = value[3]        for k_per_v = 0 to p/v - 1:            emit((i/s, j/t, k_per_v), ("A", i%s, j%t, a_ij))    else:        j = value[1]        k = value[2]        b_jk = value[3]        for i_per_s = 0 to m/s - 1:            emit((i_per_s, j/t, k/v), ("B", j%t, k%v, b_jk))  reduce(key, values):    // key is (i_per_s, j_per_t, k_per_v)    // values is a list of ("A", i_mod_s, j_mod_t, a_ij) and ("B", j_mod_t, k_mod_v, b_jk)    list_A = [(i_mod_s, j_mod_t, a_ij) for (M, i_mod_s, j_mod_t, a_ij) in values if M == "A"]    list_B = [(j_mod_t, k_mod_v, b_jk) for (M, j_mod_t, k_mod_v, b_jk) in values if M == "B"]    hash = {}    for a in list_A:        for b in list_B:            if a[1] == b[0]: // a.j_mod_t == b.j_mod_t                hash[(a[0], b[1])] += a[2]*b[2]    for {(i_mod_s, k_mod_v): v} in hash:        emit((key[0]*s + i_mod_s, key[2]*v + k_mod_v), v)

第二步

map(key, value):    emit(key, value)  reduce(key, values):    result = 0    for value in values:        result += value    emit(key, result)

Hadoop代码

import java.io.IOException;import java.util.*;import java.util.AbstractMap.SimpleEntry;import java.util.Map.Entry; import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class BlockMatrixMultiplication {     public static class Map extends Mapper<LongWritable, Text, Text, Text> {        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            Configuration conf = context.getConfiguration();            int m = Integer.parseInt(conf.get("m"));            int p = Integer.parseInt(conf.get("p"));            int s = Integer.parseInt(conf.get("s"));            int t = Integer.parseInt(conf.get("t"));            int v = Integer.parseInt(conf.get("v"));            int mPerS = m/s; // Number of blocks in each column of A.            int pPerV = p/v; // Number of blocks in each row of B.            String line = value.toString();            String[] indicesAndValue = line.split(",");            Text outputKey = new Text();            Text outputValue = new Text();            if (indicesAndValue[0].equals("A")) {                int i = Integer.parseInt(indicesAndValue[1]);                int j = Integer.parseInt(indicesAndValue[2]);                for (int kPerV = 0; kPerV < pPerV; kPerV++) {                    outputKey.set(Integer.toString(i/s) + "," + Integer.toString(j/t) + "," + Integer.toString(kPerV));                    outputValue.set("A," + Integer.toString(i%s) + "," + Integer.toString(j%t) + "," + indicesAndValue[3]);                    context.write(outputKey, outputValue);                }            } else {                int j = Integer.parseInt(indicesAndValue[1]);                int k = Integer.parseInt(indicesAndValue[2]);                for (int iPerS = 0; iPerS < mPerS; iPerS++) {                    outputKey.set(Integer.toString(iPerS) + "," + Integer.toString(j/t) + "," + Integer.toString(k/v));                    outputValue.set("B," + Integer.toString(j%t) + "," + Integer.toString(k%v) + "," + indicesAndValue[3]);                    context.write(outputKey, outputValue);                }            }        }    }     public static class Reduce extends Reducer<Text, Text, Text, Text> {        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {            String[] value;            ArrayList<Entry<String, Float>> listA = new ArrayList<Entry<String, Float>>();            ArrayList<Entry<String, Float>> listB = new ArrayList<Entry<String, Float>>();            for (Text val : values) {                value = val.toString().split(",");                if (value[0].equals("A")) {                    listA.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3])));                } else {                    listB.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3])));                }            }            String[] iModSAndJModT;            String[] jModTAndKModV;            float a_ij;            float b_jk;            String hashKey;            HashMap<String, Float> hash = new HashMap<String, Float>();            for (Entry<String, Float> a : listA) {                iModSAndJModT = a.getKey().split(",");                a_ij = a.getValue();                for (Entry<String, Float> b : listB) {                    jModTAndKModV = b.getKey().split(",");                    b_jk = b.getValue();                    if (iModSAndJModT[1].equals(jModTAndKModV[0])) {                        hashKey = iModSAndJModT[0] + "," + jModTAndKModV[1];                        if (hash.containsKey(hashKey)) {                            hash.put(hashKey, hash.get(hashKey) + a_ij*b_jk);                        } else {                            hash.put(hashKey, a_ij*b_jk);                        }                    }                }            }            String[] blockIndices = key.toString().split(",");            String[] indices;            String i;            String k;            Configuration conf = context.getConfiguration();            int s = Integer.parseInt(conf.get("s"));            int v = Integer.parseInt(conf.get("v"));            Text outputValue = new Text();            for (Entry<String, Float> entry : hash.entrySet()) {                indices = entry.getKey().split(",");                i = Integer.toString(Integer.parseInt(blockIndices[0])*s + Integer.parseInt(indices[0]));                k = Integer.toString(Integer.parseInt(blockIndices[2])*v + Integer.parseInt(indices[1]));                outputValue.set(i + "," + k + "," + Float.toString(entry.getValue()));                context.write(null, outputValue);            }        }    }     public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        // A is an m-by-n matrix; B is an n-by-p matrix.        conf.set("m", "2");        conf.set("n", "5");        conf.set("p", "3");        conf.set("s", "2"); // Number of rows in a block in A.        conf.set("t", "5"); // Number of columns in a block in A = number of rows in a block in B.        conf.set("v", "3"); // Number of columns in a block in B.         Job job = new Job(conf, "MatrixMatrixMultiplicationTwoStepsBlock");        job.setJarByClass(BlockMatrixMultiplication.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);         job.setMapperClass(Map.class);        job.setReducerClass(Reduce.class);         job.setInputFormatClass(TextInputFormat.class);        job.setOutputFormatClass(TextOutputFormat.class);         FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));         job.waitForCompletion(true);    }}

第二步map-reduce代码简单，在此省略