hadoop矩阵乘法仅用一个map/reduce

来源：互联网发布：光子嫩肤知乎编辑：程序博客网时间：2024/06/07 00:22

package com.matrix;
import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.util.*;
/**************************************
*
* @author Lincolnfather
* @date 20131107
* hadoop 矩阵乘法
* 在http://hadoop.nchc.org.tw/
* 测试通过
*
*/
public class MatrixMR {
public static class MatrixMapper extends Mapper<Object,Text,MatrixPair,IntWritable>{
private List<List<Integer>> cache = new ArrayList< List<Integer>>();
private int i = 0;
public void setup(Context context){
BufferedReader br = null;
try {
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
System.out.println(cacheFiles== null);
br =
new BufferedReader(new InputStreamReader(new FileInputStream(cacheFiles[0].toString())));
String line = null;
while((line = br.readLine())!= null){
String[] strs = line.split("#");
List<Integer> is = new ArrayList<Integer>();
for(String str :strs){
is.add(Integer.parseInt(str));
}
cache.add(is);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if(br != null) br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void map(Object key,Text value,Context context){
String[] vals = value.toString().split("#");

if(i<cache.get(0).size()){
for(int j = 0;j<cache.size();j++){
for(int k = 0;k<vals.length;k++){
try {
MatrixPair mp = new MatrixPair();
mp.setRowindex(j);
mp.setColumnindex(k);
context.write(mp,
new IntWritable(cache.get(j).get(i)*Integer.parseInt(vals[k])));
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
i++;
}
}
public static class MatrixReducer extends Reducer<MatrixPair,IntWritable,Text,Text>{
private StringBuffer sb = new StringBuffer();
public void reduce(MatrixPair key,Iterable<IntWritable> value,Context context){
int sum = 0;
for(IntWritable i:value){
sum += i.get();
}
try {
sb.append(sum);
if(key.getColumnindex() == 4){
context.write(new Text(sb.toString()), new Text(""));
sb = new StringBuffer();
}else
sb.append("#");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

public static class MatrixPair implements WritableComparable<MatrixPair> {
Integer rowindex;
Integer columnindex;
@Override
public void readFields(DataInput read) throws IOException {
// TODO Auto-generated method stub
rowindex = read.readInt();
columnindex = read.readInt();
}

public void set(){
setRowindex(rowindex);
setColumnindex(columnindex);
}
@Override
public void write(DataOutput write) throws IOException {
// TODO Auto-generated method stub
write.writeInt(rowindex);
write.writeInt(columnindex);
}
@Override
public int compareTo(MatrixPair o) {
return rowindex - o.getRowindex() != 0?
rowindex - o.getRowindex():
columnindex - o.getColumnindex() != 0?
columnindex - o.getColumnindex():0;
}
public Integer getRowindex() {
return rowindex;
}
public void setRowindex(Integer rowindex) {
this.rowindex = rowindex;
}

public Integer getColumnindex() {
return columnindex;
}

public void setColumnindex(Integer columnindex) {
this.columnindex = columnindex;
}
}
public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("matrix multi <in> <out>");
System.exit(2);
}
DistributedCache.addCacheFile(new Path("hdfs://hadoop.nchc.org.tw/user/h3969/matrix1.txt").toUri(), conf);
Job job = new Job(conf, "my matrix multiply");
job.setJarByClass(MatrixMR.class);
job.setMapperClass(MatrixMapper.class);
// job.setCombinerClass(FirstReducer.class);
job.setReducerClass(MatrixReducer.class);
job.setMapOutputKeyClass(MatrixPair.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

数据:matrix1.txt

3#4#1
2#4#8
1#2#7
5#6#5

数据matrix2.txt:

5#8#7#4#9
2#6#5#4#2
9#1#3#4#7

本算法适合数亿行以下的一个矩阵和另一个不限规模的矩阵想成，如果想实现两个不限规模的矩阵相乘则需要在数据加入DistributeCache之前切分

hadoop矩阵乘法 仅用一个map/reduce

hadoop矩阵乘法仅用一个map/reduce